Adding VB OOB validation for zero array stride

In this patch OOB validation in draw and drawIndexed is added for vertex
buffer with zero array stride. For such case, both vertex step and
instance step mode buffer can be validated for both draw and drawIndexed,
as we don't care about actual vertex count and instance count for buffer
with zero array stride.
Related unit test is also added. Also fix the DrawIndexedVertexBufferOOB
unit test.

Bug: dawn:1065
Change-Id: Id302dc0c443dec965347c8ae9f3f4d73aeddc38c
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/62200
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com>
diff --git a/src/dawn_native/CommandBufferStateTracker.cpp b/src/dawn_native/CommandBufferStateTracker.cpp
index 9afd14b..f3a6b47 100644
--- a/src/dawn_native/CommandBufferStateTracker.cpp
+++ b/src/dawn_native/CommandBufferStateTracker.cpp
@@ -88,11 +88,17 @@
                 mLastRenderPipeline->GetVertexBuffer(usedSlotVertex);
             uint64_t arrayStride = vertexBuffer.arrayStride;
             uint64_t bufferSize = mVertexBufferSizes[usedSlotVertex];
-            // firstVertex and vertexCount are in uint32_t, and arrayStride must not
-            // be larger than kMaxVertexBufferArrayStride, which is currently 2048. So by
-            // doing checks in uint64_t we avoid overflows.
-            if ((static_cast<uint64_t>(firstVertex) + vertexCount) * arrayStride > bufferSize) {
-                return DAWN_VALIDATION_ERROR("Vertex buffer out of bound");
+            if (arrayStride == 0) {
+                if (vertexBuffer.usedBytesInStride > bufferSize) {
+                    return DAWN_VALIDATION_ERROR("Vertex buffer out of bound");
+                }
+            } else {
+                // firstVertex and vertexCount are in uint32_t, and arrayStride must not
+                // be larger than kMaxVertexBufferArrayStride, which is currently 2048. So by
+                // doing checks in uint64_t we avoid overflows.
+                if ((static_cast<uint64_t>(firstVertex) + vertexCount) * arrayStride > bufferSize) {
+                    return DAWN_VALIDATION_ERROR("Vertex buffer out of bound");
+                }
             }
         }
 
@@ -111,11 +117,18 @@
                 mLastRenderPipeline->GetVertexBuffer(usedSlotInstance);
             uint64_t arrayStride = vertexBuffer.arrayStride;
             uint64_t bufferSize = mVertexBufferSizes[usedSlotInstance];
-            // firstInstance and instanceCount are in uint32_t, and arrayStride must
-            // not be larger than kMaxVertexBufferArrayStride, which is currently 2048.
-            // So by doing checks in uint64_t we avoid overflows.
-            if ((static_cast<uint64_t>(firstInstance) + instanceCount) * arrayStride > bufferSize) {
-                return DAWN_VALIDATION_ERROR("Vertex buffer out of bound");
+            if (arrayStride == 0) {
+                if (vertexBuffer.usedBytesInStride > bufferSize) {
+                    return DAWN_VALIDATION_ERROR("Vertex buffer out of bound");
+                }
+            } else {
+                // firstInstance and instanceCount are in uint32_t, and arrayStride must
+                // not be larger than kMaxVertexBufferArrayStride, which is currently 2048.
+                // So by doing checks in uint64_t we avoid overflows.
+                if ((static_cast<uint64_t>(firstInstance) + instanceCount) * arrayStride >
+                    bufferSize) {
+                    return DAWN_VALIDATION_ERROR("Vertex buffer out of bound");
+                }
             }
         }
 
diff --git a/src/dawn_native/RenderEncoderBase.cpp b/src/dawn_native/RenderEncoderBase.cpp
index a22a2df..e98c45a 100644
--- a/src/dawn_native/RenderEncoderBase.cpp
+++ b/src/dawn_native/RenderEncoderBase.cpp
@@ -102,6 +102,10 @@
 
                 DAWN_TRY(mCommandBufferState.ValidateIndexBufferInRange(indexCount, firstIndex));
 
+                // Although we don't know actual vertex access range in CPU, we still call the
+                // ValidateBufferInRangeForVertexBuffer in order to deal with those vertex step mode
+                // vertex buffer with an array stride of zero.
+                DAWN_TRY(mCommandBufferState.ValidateBufferInRangeForVertexBuffer(0, 0));
                 DAWN_TRY(mCommandBufferState.ValidateBufferInRangeForInstanceBuffer(instanceCount,
                                                                                     firstInstance));
             }
diff --git a/src/dawn_native/RenderPipeline.cpp b/src/dawn_native/RenderPipeline.cpp
index c1c3335..decde9c 100644
--- a/src/dawn_native/RenderPipeline.cpp
+++ b/src/dawn_native/RenderPipeline.cpp
@@ -490,6 +490,7 @@
             mVertexBufferSlotsUsed.set(typedSlot);
             mVertexBufferInfos[typedSlot].arrayStride = buffers[slot].arrayStride;
             mVertexBufferInfos[typedSlot].stepMode = buffers[slot].stepMode;
+            mVertexBufferInfos[typedSlot].usedBytesInStride = 0;
             switch (buffers[slot].stepMode) {
                 case wgpu::VertexStepMode::Vertex:
                     mVertexBufferSlotsUsedAsVertexBuffer.set(typedSlot);
@@ -509,6 +510,17 @@
                 mAttributeInfos[location].vertexBufferSlot = typedSlot;
                 mAttributeInfos[location].offset = buffers[slot].attributes[i].offset;
                 mAttributeInfos[location].format = buffers[slot].attributes[i].format;
+                // Compute the access boundary of this attribute by adding attribute format size to
+                // attribute offset. Although offset is in uint64_t, such sum must be no larger than
+                // maxVertexBufferArrayStride (2048), which is promised by the GPUVertexBufferLayout
+                // validation of creating render pipeline. Therefore, calculating in uint16_t will
+                // cause no overflow.
+                DAWN_ASSERT(buffers[slot].attributes[i].offset <= 2048);
+                uint16_t accessBoundary =
+                    uint16_t(buffers[slot].attributes[i].offset) +
+                    uint16_t(GetVertexFormatInfo(buffers[slot].attributes[i].format).byteSize);
+                mVertexBufferInfos[typedSlot].usedBytesInStride =
+                    std::max(mVertexBufferInfos[typedSlot].usedBytesInStride, accessBoundary);
             }
         }
 
diff --git a/src/dawn_native/RenderPipeline.h b/src/dawn_native/RenderPipeline.h
index a379cd6..0aebfc2 100644
--- a/src/dawn_native/RenderPipeline.h
+++ b/src/dawn_native/RenderPipeline.h
@@ -50,6 +50,7 @@
     struct VertexBufferInfo {
         uint64_t arrayStride;
         wgpu::VertexStepMode stepMode;
+        uint16_t usedBytesInStride;
     };
 
     class RenderPipelineBase : public PipelineBase {
diff --git a/src/tests/unittests/validation/DrawVertexAndIndexBufferOOBValidationTests.cpp b/src/tests/unittests/validation/DrawVertexAndIndexBufferOOBValidationTests.cpp
index fa45e01..0a34e6a 100644
--- a/src/tests/unittests/validation/DrawVertexAndIndexBufferOOBValidationTests.cpp
+++ b/src/tests/unittests/validation/DrawVertexAndIndexBufferOOBValidationTests.cpp
@@ -195,6 +195,23 @@
             return CreateRenderPipelineWithBufferDesc(bufferDescList);
         }
 
+        // Create a render pipeline using one vertex-step-mode and one instance-step-mode buffer,
+        // both with a zero array stride. The minimal size of vertex step mode buffer should be 28,
+        // and the minimal size of instance step mode buffer should be 20.
+        wgpu::RenderPipeline CreateBasicRenderPipelineWithZeroArrayStride() {
+            std::vector<PipelineVertexBufferDesc> bufferDescList = {
+                {0,
+                 wgpu::VertexStepMode::Vertex,
+                 {{0, wgpu::VertexFormat::Float32x4, 0}, {1, wgpu::VertexFormat::Float32x2, 20}}},
+                {0,
+                 wgpu::VertexStepMode::Instance,
+                 // Two attributes are overlapped within this instance step mode vertex buffer
+                 {{3, wgpu::VertexFormat::Float32x4, 4}, {7, wgpu::VertexFormat::Float32x3, 0}}},
+            };
+
+            return CreateRenderPipelineWithBufferDesc(bufferDescList);
+        }
+
         void TestRenderPassDraw(const wgpu::RenderPipeline& pipeline,
                                 VertexBufferList vertexBufferList,
                                 uint32_t vertexCount,
@@ -482,29 +499,96 @@
 
                 IndexBufferDesc indexBufferDesc = {indexBuffer, indexFormat};
 
-                uint32_t vert = vertexParams.maxValidAccessNumber;
                 uint32_t inst = instanceParams.maxValidAccessNumber;
                 // Control case
-                TestRenderPassDrawIndexed(pipeline, indexBufferDesc, vertexBufferList, vert, inst,
-                                          0, 0, 0, true);
+                TestRenderPassDrawIndexed(pipeline, indexBufferDesc, vertexBufferList, 12, inst, 0,
+                                          0, 0, true);
                 // Vertex buffer (stepMode = instance) OOB, instanceCount too large
-                TestRenderPassDrawIndexed(pipeline, indexBufferDesc, vertexBufferList, vert,
-                                          inst + 1, 0, 0, 0, false);
+                TestRenderPassDrawIndexed(pipeline, indexBufferDesc, vertexBufferList, 12, inst + 1,
+                                          0, 0, 0, false);
 
                 if (!HasToggleEnabled("disable_base_instance")) {
                     // firstInstance is considered in CPU validation
                     // Vertex buffer (stepMode = instance) in bound
-                    TestRenderPassDrawIndexed(pipeline, indexBufferDesc, vertexBufferList, vert,
+                    TestRenderPassDrawIndexed(pipeline, indexBufferDesc, vertexBufferList, 12,
                                               inst - 1, 0, 0, 1, true);
                     // Vertex buffer (stepMode = instance) OOB, instanceCount + firstInstance too
                     // large
-                    TestRenderPassDrawIndexed(pipeline, indexBufferDesc, vertexBufferList, vert,
-                                              inst, 0, 0, 1, false);
+                    TestRenderPassDrawIndexed(pipeline, indexBufferDesc, vertexBufferList, 12, inst,
+                                              0, 0, 1, false);
                 }
+            }
+        }
+    }
 
-                // OOB of vertex buffer that stepMode=vertex can not be validated in CPU.
-                TestRenderPassDrawIndexed(pipeline, indexBufferDesc, vertexBufferList, vert + 1,
-                                          inst, 0, 0, 0, true);
+    // Verify instance mode vertex buffer OOB for DrawIndexed are caught in command encoder
+    TEST_F(DrawVertexAndIndexBufferOOBValidationTests, ZeroArrayStrideVertexBufferOOB) {
+        // In this test, we use VertexBufferParams.maxValidAccessNumber > 0 to indicate that such
+        // buffer parameter meet the requirement of pipeline, and maxValidAccessNumber == 0 to
+        // indicate that such buffer parameter will cause OOB.
+        const std::vector<VertexBufferParams> kVertexParamsListForZeroStride = {
+            // Control case
+            {0, 28, 0, 0, 1},
+            // Non-zero offset
+            {0, 28, 4, 0, 0},
+            {0, 28, 28, 0, 0},
+            // Non-zero size
+            {0, 28, 0, 28, 1},
+            {0, 28, 0, 27, 0},
+            // Non-zero offset and size
+            {0, 32, 4, 28, 1},
+            {0, 31, 4, 27, 0},
+            {0, 31, 4, 0, 0},
+        };
+
+        const std::vector<VertexBufferParams> kInstanceParamsListForZeroStride = {
+            // Control case
+            {0, 20, 0, 0, 1},
+            // Non-zero offset
+            {0, 24, 4, 0, 1},
+            {0, 20, 4, 0, 0},
+            {0, 20, 20, 0, 0},
+            // Non-zero size
+            {0, 21, 0, 20, 1},
+            {0, 20, 0, 19, 0},
+            // Non-zero offset and size
+            {0, 30, 4, 20, 1},
+            {0, 30, 4, 19, 0},
+            {0, 23, 4, 0, 0},
+        };
+
+        // Build a pipeline that require a vertex step mode vertex buffer no smaller than 28 bytes
+        // and an instance step mode buffer no smaller than 20 bytes
+        wgpu::RenderPipeline pipeline = CreateBasicRenderPipelineWithZeroArrayStride();
+
+        for (VertexBufferParams vertexParams : kVertexParamsListForZeroStride) {
+            for (VertexBufferParams instanceParams : kInstanceParamsListForZeroStride) {
+                auto indexFormat = wgpu::IndexFormat::Uint32;
+                auto indexStride = sizeof(uint32_t);
+
+                // Build index buffer for 12 indexes
+                wgpu::Buffer indexBuffer = CreateBuffer(12 * indexStride, wgpu::BufferUsage::Index);
+                // Build vertex buffer for vertices
+                wgpu::Buffer vertexBuffer = CreateBuffer(vertexParams.bufferSize);
+                // Build vertex buffer for instances
+                wgpu::Buffer instanceBuffer = CreateBuffer(instanceParams.bufferSize);
+
+                VertexBufferList vertexBufferList = {
+                    {0, vertexBuffer, vertexParams.bufferOffsetForEncoder,
+                     vertexParams.bufferSizeForEncoder},
+                    {1, instanceBuffer, instanceParams.bufferOffsetForEncoder,
+                     instanceParams.bufferSizeForEncoder}};
+
+                IndexBufferDesc indexBufferDesc = {indexBuffer, indexFormat};
+
+                const bool isSuccess = (vertexParams.maxValidAccessNumber > 0) &&
+                                       (instanceParams.maxValidAccessNumber > 0);
+                // vertexCount and instanceCount doesn't matter, as array stride is zero and all
+                // vertex/instance access the same space of buffer
+                TestRenderPassDraw(pipeline, vertexBufferList, 100, 100, 0, 0, isSuccess);
+                // indexCount doesn't matter as long as no index buffer OOB happened
+                TestRenderPassDrawIndexed(pipeline, indexBufferDesc, vertexBufferList, 12, 100, 0,
+                                          0, 0, isSuccess);
             }
         }
     }