Relax vertex stride requirement

Relax validation of last stride of vertex buffer according to the
updated spec.

Bug: dawn:1287
Change-Id: I7a58401933b48c5cb121ba73c592575ada3e7151
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/83203
Auto-Submit: Shrek Shao <shrekshao@google.com>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
diff --git a/src/dawn/native/CommandBufferStateTracker.cpp b/src/dawn/native/CommandBufferStateTracker.cpp
index 370139e..ee164c7 100644
--- a/src/dawn/native/CommandBufferStateTracker.cpp
+++ b/src/dawn/native/CommandBufferStateTracker.cpp
@@ -104,17 +104,24 @@
                                 bufferSize, static_cast<uint8_t>(usedSlotVertex),
                                 vertexBuffer.usedBytesInStride);
             } else {
-                uint64_t requiredSize =
-                    (static_cast<uint64_t>(firstVertex) + vertexCount) * arrayStride;
-                // firstVertex and vertexCount are in uint32_t, and arrayStride must not
-                // be larger than kMaxVertexBufferArrayStride, which is currently 2048. So by
-                // doing checks in uint64_t we avoid overflows.
-                DAWN_INVALID_IF(
-                    requiredSize > bufferSize,
-                    "Vertex range (first: %u, count: %u) requires a larger buffer (%u) than the "
-                    "bound buffer size (%u) of the vertex buffer at slot %u with stride (%u).",
-                    firstVertex, vertexCount, requiredSize, bufferSize,
-                    static_cast<uint8_t>(usedSlotVertex), arrayStride);
+                uint64_t strideCount = static_cast<uint64_t>(firstVertex) + vertexCount;
+                if (strideCount != 0u) {
+                    uint64_t requiredSize =
+                        (strideCount - 1u) * arrayStride + vertexBuffer.lastStride;
+                    // firstVertex and vertexCount are in uint32_t,
+                    // arrayStride must not be larger than kMaxVertexBufferArrayStride, which is
+                    // currently 2048, and vertexBuffer.lastStride = max(attribute.offset +
+                    // sizeof(attribute.format)) with attribute.offset being no larger than
+                    // kMaxVertexBufferArrayStride, so by doing checks in uint64_t we avoid
+                    // overflows.
+                    DAWN_INVALID_IF(
+                        requiredSize > bufferSize,
+                        "Vertex range (first: %u, count: %u) requires a larger buffer (%u) than "
+                        "the "
+                        "bound buffer size (%u) of the vertex buffer at slot %u with stride %u.",
+                        firstVertex, vertexCount, requiredSize, bufferSize,
+                        static_cast<uint8_t>(usedSlotVertex), arrayStride);
+                }
             }
         }
 
@@ -142,17 +149,24 @@
                                 bufferSize, static_cast<uint8_t>(usedSlotInstance),
                                 vertexBuffer.usedBytesInStride);
             } else {
-                uint64_t requiredSize =
-                    (static_cast<uint64_t>(firstInstance) + instanceCount) * arrayStride;
-                // firstInstance and instanceCount are in uint32_t, and arrayStride must
-                // not be larger than kMaxVertexBufferArrayStride, which is currently 2048.
-                // So by doing checks in uint64_t we avoid overflows.
-                DAWN_INVALID_IF(
-                    requiredSize > bufferSize,
-                    "Instance range (first: %u, count: %u) requires a larger buffer (%u) than the "
-                    "bound buffer size (%u) of the vertex buffer at slot %u with stride (%u).",
-                    firstInstance, instanceCount, requiredSize, bufferSize,
-                    static_cast<uint8_t>(usedSlotInstance), arrayStride);
+                uint64_t strideCount = static_cast<uint64_t>(firstInstance) + instanceCount;
+                if (strideCount != 0u) {
+                    uint64_t requiredSize =
+                        (strideCount - 1u) * arrayStride + vertexBuffer.lastStride;
+                    // firstInstance and instanceCount are in uint32_t,
+                    // arrayStride must not be larger than kMaxVertexBufferArrayStride, which is
+                    // currently 2048, and vertexBuffer.lastStride = max(attribute.offset +
+                    // sizeof(attribute.format)) with attribute.offset being no larger than
+                    // kMaxVertexBufferArrayStride, so by doing checks in uint64_t we avoid
+                    // overflows.
+                    DAWN_INVALID_IF(
+                        requiredSize > bufferSize,
+                        "Instance range (first: %u, count: %u) requires a larger buffer (%u) than "
+                        "the "
+                        "bound buffer size (%u) of the vertex buffer at slot %u with stride %u.",
+                        firstInstance, instanceCount, requiredSize, bufferSize,
+                        static_cast<uint8_t>(usedSlotInstance), arrayStride);
+                }
             }
         }
 
diff --git a/src/dawn/native/RenderPipeline.cpp b/src/dawn/native/RenderPipeline.cpp
index b61b192e..47d3d20 100644
--- a/src/dawn/native/RenderPipeline.cpp
+++ b/src/dawn/native/RenderPipeline.cpp
@@ -610,6 +610,7 @@
             mVertexBufferInfos[typedSlot].arrayStride = buffers[slot].arrayStride;
             mVertexBufferInfos[typedSlot].stepMode = buffers[slot].stepMode;
             mVertexBufferInfos[typedSlot].usedBytesInStride = 0;
+            mVertexBufferInfos[typedSlot].lastStride = 0;
             switch (buffers[slot].stepMode) {
                 case wgpu::VertexStepMode::Vertex:
                     mVertexBufferSlotsUsedAsVertexBuffer.set(typedSlot);
@@ -634,12 +635,16 @@
                 // maxVertexBufferArrayStride (2048), which is promised by the GPUVertexBufferLayout
                 // validation of creating render pipeline. Therefore, calculating in uint16_t will
                 // cause no overflow.
+                uint32_t formatByteSize =
+                    GetVertexFormatInfo(buffers[slot].attributes[i].format).byteSize;
                 DAWN_ASSERT(buffers[slot].attributes[i].offset <= 2048);
                 uint16_t accessBoundary =
-                    uint16_t(buffers[slot].attributes[i].offset) +
-                    uint16_t(GetVertexFormatInfo(buffers[slot].attributes[i].format).byteSize);
+                    uint16_t(buffers[slot].attributes[i].offset) + uint16_t(formatByteSize);
                 mVertexBufferInfos[typedSlot].usedBytesInStride =
                     std::max(mVertexBufferInfos[typedSlot].usedBytesInStride, accessBoundary);
+                mVertexBufferInfos[typedSlot].lastStride =
+                    std::max(mVertexBufferInfos[typedSlot].lastStride,
+                             mAttributeInfos[location].offset + formatByteSize);
             }
         }
 
diff --git a/src/dawn/native/RenderPipeline.h b/src/dawn/native/RenderPipeline.h
index da6cdd2..429f2a9 100644
--- a/src/dawn/native/RenderPipeline.h
+++ b/src/dawn/native/RenderPipeline.h
@@ -54,6 +54,9 @@
         uint64_t arrayStride;
         wgpu::VertexStepMode stepMode;
         uint16_t usedBytesInStride;
+        // As indicated in the spec, the lastStride is max(attribute.offset +
+        // sizeof(attribute.format)) for each attribute in the buffer[slot]
+        uint64_t lastStride;
     };
 
     class RenderPipelineBase : public PipelineBase {
diff --git a/src/dawn/tests/unittests/validation/VertexBufferValidationTests.cpp b/src/dawn/tests/unittests/validation/VertexBufferValidationTests.cpp
index 35e7889..d26b5d5 100644
--- a/src/dawn/tests/unittests/validation/VertexBufferValidationTests.cpp
+++ b/src/dawn/tests/unittests/validation/VertexBufferValidationTests.cpp
@@ -25,6 +25,11 @@
     void SetUp() override {
         ValidationTest::SetUp();
 
+        // dummy vertex shader module
+        vsModule = utils::CreateShaderModule(device, R"(
+            @stage(vertex) fn main() -> @builtin(position) vec4<f32> {
+                return vec4<f32>(0.0, 0.0, 0.0, 0.0);
+            })");
         fsModule = utils::CreateShaderModule(device, R"(
             @stage(fragment) fn main() -> @location(0) vec4<f32> {
                 return vec4<f32>(0.0, 1.0, 0.0, 1.0);
@@ -67,6 +72,18 @@
     }
 
     wgpu::RenderPipeline MakeRenderPipeline(const wgpu::ShaderModule& vsModule,
+                                            const utils::ComboVertexState& state) {
+        utils::ComboRenderPipelineDescriptor descriptor;
+        descriptor.vertex.module = vsModule;
+        descriptor.cFragment.module = fsModule;
+
+        descriptor.vertex.bufferCount = state.vertexBufferCount;
+        descriptor.vertex.buffers = &state.cVertexBuffers[0];
+
+        return device.CreateRenderPipeline(&descriptor);
+    }
+
+    wgpu::RenderPipeline MakeRenderPipeline(const wgpu::ShaderModule& vsModule,
                                             unsigned int bufferCount) {
         utils::ComboRenderPipelineDescriptor descriptor;
         descriptor.vertex.module = vsModule;
@@ -83,6 +100,7 @@
         return device.CreateRenderPipeline(&descriptor);
     }
 
+    wgpu::ShaderModule vsModule;
     wgpu::ShaderModule fsModule;
 };
 
@@ -354,3 +372,475 @@
         ASSERT_DEVICE_ERROR(encoder.Finish());
     }
 }
+
+// Check vertex buffer stride requirements for draw command.
+TEST_F(VertexBufferValidationTest, DrawStrideLimitsVertex) {
+    DummyRenderPass renderPass(device);
+
+    // Create a buffer of size 28, containing 4 float32 elements, array stride size = 8
+    // The last element doesn't have the full stride size
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 28;
+    descriptor.usage = wgpu::BufferUsage::Vertex;
+    wgpu::Buffer vertexBuffer = device.CreateBuffer(&descriptor);
+
+    // Vertex attribute offset is 0
+    wgpu::RenderPipeline pipeline1;
+    {
+        utils::ComboVertexState state;
+        state.vertexBufferCount = 1;
+        state.cVertexBuffers[0].arrayStride = 8;
+        state.cVertexBuffers[0].stepMode = wgpu::VertexStepMode::Vertex;
+        state.cVertexBuffers[0].attributeCount = 1;
+        state.cAttributes[0].offset = 0;
+
+        pipeline1 = MakeRenderPipeline(vsModule, state);
+    }
+
+    // Vertex attribute offset is 4
+    wgpu::RenderPipeline pipeline2;
+    {
+        utils::ComboVertexState state;
+        state.vertexBufferCount = 1;
+        state.cVertexBuffers[0].arrayStride = 8;
+        state.cVertexBuffers[0].stepMode = wgpu::VertexStepMode::Vertex;
+        state.cVertexBuffers[0].attributeCount = 1;
+        state.cAttributes[0].offset = 4;
+
+        pipeline2 = MakeRenderPipeline(vsModule, state);
+    }
+
+    // Control case: draw 3 elements, 3 * 8 = 24 <= 28, is valid anyway
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(3);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Valid: draw 3 elements with firstVertex == 1, (2 + 1) * 8 + 4 = 28 <= 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(3, 0, 1, 0);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Valid: draw 3 elements with offset == 4, 4 + 3 * 8 = 24 <= 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(3);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Valid: draw 4 elements, 4 * 8 = 32 > 28
+    // But the last element does not require to have the full stride size
+    // So 3 * 8 + 4 = 28 <= 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(4);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Invalid: draw 4 elements with firstVertex == 1
+    // It requires a buffer with size of (3 + 1) * 8 + 4 = 36 > 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(4, 0, 1, 0);
+        pass.End();
+    }
+    ASSERT_DEVICE_ERROR(encoder.Finish());
+
+    // Invalid: draw 4 elements with offset == 4
+    // It requires a buffer with size of 4 + 3 * 8 + 4 = 32 > 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(4);
+        pass.End();
+    }
+    ASSERT_DEVICE_ERROR(encoder.Finish());
+
+    // Valid: stride count == 0
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(0);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Invalid: stride count == 4
+    // It requires a buffer with size of 4 + 3 * 8 + 4 = 32 > 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(0, 0, 4);
+        pass.End();
+    }
+    ASSERT_DEVICE_ERROR(encoder.Finish());
+}
+
+// Check instance buffer stride requirements with instanced attributes for draw command.
+TEST_F(VertexBufferValidationTest, DrawStrideLimitsInstance) {
+    DummyRenderPass renderPass(device);
+
+    // Create a buffer of size 28, containing 4 float32 elements, array stride size = 8
+    // The last element doesn't have the full stride size
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 28;
+    descriptor.usage = wgpu::BufferUsage::Vertex;
+    wgpu::Buffer vertexBuffer = device.CreateBuffer(&descriptor);
+
+    // Vertex attribute offset is 0
+    wgpu::RenderPipeline pipeline1;
+    {
+        utils::ComboVertexState state;
+        state.vertexBufferCount = 1;
+        state.cVertexBuffers[0].arrayStride = 8;
+        state.cVertexBuffers[0].stepMode = wgpu::VertexStepMode::Instance;
+        state.cVertexBuffers[0].attributeCount = 1;
+        state.cAttributes[0].offset = 0;
+
+        pipeline1 = MakeRenderPipeline(vsModule, state);
+    }
+
+    // Vertex attribute offset is 4
+    wgpu::RenderPipeline pipeline2;
+    {
+        utils::ComboVertexState state;
+        state.vertexBufferCount = 1;
+        state.cVertexBuffers[0].arrayStride = 8;
+        state.cVertexBuffers[0].stepMode = wgpu::VertexStepMode::Instance;
+        state.cVertexBuffers[0].attributeCount = 1;
+        state.cAttributes[0].offset = 4;
+
+        pipeline2 = MakeRenderPipeline(vsModule, state);
+    }
+
+    // Control case: draw 3 instances, 3 * 8 = 24 <= 28, is valid anyway
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(1, 3);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Valid: draw 3 instances with firstInstance == 1, (2 + 1) * 8 + 4 = 28 <= 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(1, 3, 0, 1);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Valid: draw 3 instances with offset == 4, 4 + 3 * 8 = 24 <= 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(1, 3);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Valid: draw 4 instances, 4 * 8 = 32 > 28
+    // But the last element does not require to have the full stride size
+    // So 3 * 8 + 4 = 28 <= 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(1, 4);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Invalid: draw 4 instances with firstInstance == 1
+    // It requires a buffer with size of (3 + 1) * 8 + 4 = 36 > 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(1, 4, 0, 1);
+        pass.End();
+    }
+    ASSERT_DEVICE_ERROR(encoder.Finish());
+
+    // Invalid: draw 4 instances with offset == 4
+    // It requires a buffer with size of 4 + 3 * 8 + 4 = 32 > 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(1, 4);
+        pass.End();
+    }
+    ASSERT_DEVICE_ERROR(encoder.Finish());
+
+    // Valid: stride count == 0
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(1, 0);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Invalid, stride count == 4
+    // It requires a buffer with size of 4 + 3 * 8 + 4 = 32 > 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(1, 0, 0, 4);
+        pass.End();
+    }
+    ASSERT_DEVICE_ERROR(encoder.Finish());
+}
+
+// Check vertex buffer stride requirements with instanced attributes for draw indexed command.
+TEST_F(VertexBufferValidationTest, DrawIndexedStrideLimitsInstance) {
+    DummyRenderPass renderPass(device);
+
+    // Create a buffer of size 28, containing 4 float32 elements, array stride size = 8
+    // The last element doesn't have the full stride size
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 28;
+    descriptor.usage = wgpu::BufferUsage::Vertex;
+    wgpu::Buffer vertexBuffer = device.CreateBuffer(&descriptor);
+
+    wgpu::Buffer indexBuffer =
+        utils::CreateBufferFromData<uint32_t>(device, wgpu::BufferUsage::Index, {0, 1, 2});
+
+    // Vertex attribute offset is 0
+    wgpu::RenderPipeline pipeline1;
+    {
+        utils::ComboVertexState state;
+        state.vertexBufferCount = 1;
+        state.cVertexBuffers[0].arrayStride = 8;
+        state.cVertexBuffers[0].stepMode = wgpu::VertexStepMode::Instance;
+        state.cVertexBuffers[0].attributeCount = 1;
+        state.cAttributes[0].offset = 0;
+
+        pipeline1 = MakeRenderPipeline(vsModule, state);
+    }
+
+    // Vertex attribute offset is 4
+    wgpu::RenderPipeline pipeline2;
+    {
+        utils::ComboVertexState state;
+        state.vertexBufferCount = 1;
+        state.cVertexBuffers[0].arrayStride = 8;
+        state.cVertexBuffers[0].stepMode = wgpu::VertexStepMode::Instance;
+        state.cVertexBuffers[0].attributeCount = 1;
+        state.cAttributes[0].offset = 4;
+
+        pipeline2 = MakeRenderPipeline(vsModule, state);
+    }
+
+    // Control case: draw 3 instances, 3 * 8 = 24 <= 28, is valid anyway
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.SetIndexBuffer(indexBuffer, wgpu::IndexFormat::Uint32);
+        pass.DrawIndexed(3, 3);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Valid: draw 3 instances with firstInstance == 1, (2 + 1) * 8 + 4 = 28 <= 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.SetIndexBuffer(indexBuffer, wgpu::IndexFormat::Uint32);
+        pass.Draw(3, 3, 0, 1);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Valid: draw 3 instances with offset == 4, 4 + 3 * 8 = 24 <= 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.SetIndexBuffer(indexBuffer, wgpu::IndexFormat::Uint32);
+        pass.Draw(3, 3);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Valid: draw 4 instances, 4 * 8 = 32 > 28
+    // But the last element does not require to have the full stride size
+    // So 3 * 8 + 4 = 28 <= 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.SetIndexBuffer(indexBuffer, wgpu::IndexFormat::Uint32);
+        pass.Draw(3, 4);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Invalid: draw 4 instances with firstInstance == 1
+    // It requires a buffer with size of (3 + 1) * 8 + 4 = 36 > 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.SetIndexBuffer(indexBuffer, wgpu::IndexFormat::Uint32);
+        pass.Draw(3, 4, 0, 1);
+        pass.End();
+    }
+    ASSERT_DEVICE_ERROR(encoder.Finish());
+
+    // Invalid: draw 4 instances with offset == 4
+    // It requires a buffer with size of 4 + 3 * 8 + 4 = 32 > 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.SetIndexBuffer(indexBuffer, wgpu::IndexFormat::Uint32);
+        pass.Draw(3, 4);
+        pass.End();
+    }
+    ASSERT_DEVICE_ERROR(encoder.Finish());
+
+    // Valid: stride count == 0
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(3, 0);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Invalid, stride count == 4
+    // It requires a buffer with size of 4 + 3 * 8 + 4 = 32 > 28
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.SetIndexBuffer(indexBuffer, wgpu::IndexFormat::Uint32);
+        pass.Draw(3, 0, 0, 4);
+        pass.End();
+    }
+    ASSERT_DEVICE_ERROR(encoder.Finish());
+}
+
+// Check last stride is computed correctly for vertex buffer with multiple attributes.
+TEST_F(VertexBufferValidationTest, DrawStrideLimitsVertexMultipleAttributes) {
+    DummyRenderPass renderPass(device);
+
+    // Create a buffer of size 44, array stride size = 12
+    wgpu::BufferDescriptor descriptor;
+    descriptor.size = 44;
+    descriptor.usage = wgpu::BufferUsage::Vertex;
+    wgpu::Buffer vertexBuffer = device.CreateBuffer(&descriptor);
+
+    // lastStride = attribute[1].offset + sizeof(attribute[1].format) = 8
+    wgpu::RenderPipeline pipeline1;
+    {
+        utils::ComboVertexState state;
+        state.vertexBufferCount = 1;
+        state.cVertexBuffers[0].arrayStride = 12;
+        state.cVertexBuffers[0].stepMode = wgpu::VertexStepMode::Vertex;
+        state.cVertexBuffers[0].attributeCount = 2;
+        state.cAttributes[0].format = wgpu::VertexFormat::Float32;
+        state.cAttributes[0].offset = 0;
+        state.cAttributes[0].shaderLocation = 0;
+        state.cAttributes[1].format = wgpu::VertexFormat::Float32;
+        state.cAttributes[1].offset = 4;
+        state.cAttributes[1].shaderLocation = 1;
+
+        pipeline1 = MakeRenderPipeline(vsModule, state);
+    }
+
+    // lastStride = attribute[1].offset + sizeof(attribute[1].format) = 12
+    wgpu::RenderPipeline pipeline2;
+    {
+        utils::ComboVertexState state;
+        state.vertexBufferCount = 1;
+        state.cVertexBuffers[0].arrayStride = 12;
+        state.cVertexBuffers[0].stepMode = wgpu::VertexStepMode::Vertex;
+        state.cVertexBuffers[0].attributeCount = 2;
+        state.cAttributes[0].format = wgpu::VertexFormat::Float32;
+        state.cAttributes[0].offset = 0;
+        state.cAttributes[0].shaderLocation = 0;
+        state.cAttributes[1].format = wgpu::VertexFormat::Float32x2;
+        state.cAttributes[1].offset = 4;
+        state.cAttributes[1].shaderLocation = 1;
+
+        pipeline2 = MakeRenderPipeline(vsModule, state);
+    }
+
+    // Valid: draw 4 elements, last stride is 8, 3 * 12 + 8 = 44 <= 44
+    wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline1);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(4);
+        pass.End();
+    }
+    encoder.Finish();
+
+    // Invalid: draw 4 elements, last stride is 12, 3 * 12 + 12 = 48 > 44
+    encoder = device.CreateCommandEncoder();
+    {
+        wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass);
+        pass.SetPipeline(pipeline2);
+        pass.SetVertexBuffer(0, vertexBuffer);
+        pass.Draw(4);
+        pass.End();
+    }
+    ASSERT_DEVICE_ERROR(encoder.Finish());
+}
diff --git a/src/dawn/tests/unittests/validation/VertexStateValidationTests.cpp b/src/dawn/tests/unittests/validation/VertexStateValidationTests.cpp
index eeb2f69..e522cb3 100644
--- a/src/dawn/tests/unittests/validation/VertexStateValidationTests.cpp
+++ b/src/dawn/tests/unittests/validation/VertexStateValidationTests.cpp
@@ -85,7 +85,6 @@
 }
 
 // Check validation that pipeline vertex buffers are backed by attributes in the vertex input
-// Check validation that pipeline vertex buffers are backed by attributes in the vertex input
 TEST_F(VertexStateTest, PipelineCompatibility) {
     utils::ComboVertexState state;
     state.vertexBufferCount = 1;