Zero the index offsets before an indirect draw

Prevent reusing offsets from a previous direct draw.
Update test to verify that values are updated correctly
for each draw. Add tests for indirect draw offsets.

Bug: dawn:548
Change-Id: Ice8325a8a41b8a4375767156dbaba3ee3d714f3b
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/67121
Reviewed-by: Austin Eng <enga@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
diff --git a/src/dawn_native/d3d12/CommandBufferD3D12.cpp b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
index dc32d10..702f683 100644
--- a/src/dawn_native/d3d12/CommandBufferD3D12.cpp
+++ b/src/dawn_native/d3d12/CommandBufferD3D12.cpp
@@ -1400,6 +1400,11 @@
 
                     DAWN_TRY(bindingTracker->Apply(commandContext));
                     vertexBufferTracker.Apply(commandList, lastPipeline);
+
+                    // TODO(dawn:548): remove this once builtins are emulated for indirect draws.
+                    // Zero the index offset values to avoid reusing values from the previous draw
+                    RecordFirstIndexOffset(commandList, lastPipeline, 0, 0);
+
                     Buffer* buffer = ToBackend(draw->indirectBuffer.Get());
                     ComPtr<ID3D12CommandSignature> signature =
                         ToBackend(GetDevice())->GetDrawIndirectSignature();
@@ -1414,6 +1419,11 @@
 
                     DAWN_TRY(bindingTracker->Apply(commandContext));
                     vertexBufferTracker.Apply(commandList, lastPipeline);
+
+                    // TODO(dawn:548): remove this once builtins are emulated for indirect draws.
+                    // Zero the index offset values to avoid reusing values from the previous draw
+                    RecordFirstIndexOffset(commandList, lastPipeline, 0, 0);
+
                     Buffer* buffer = ToBackend(draw->indirectBufferLocation->GetBuffer());
                     ComPtr<ID3D12CommandSignature> signature =
                         ToBackend(GetDevice())->GetDrawIndexedIndirectSignature();
diff --git a/src/tests/end2end/FirstIndexOffsetTests.cpp b/src/tests/end2end/FirstIndexOffsetTests.cpp
index 92ad316..7558e33 100644
--- a/src/tests/end2end/FirstIndexOffsetTests.cpp
+++ b/src/tests/end2end/FirstIndexOffsetTests.cpp
@@ -25,6 +25,8 @@
 enum class DrawMode {
     NonIndexed,
     Indexed,
+    NonIndexedIndirect,
+    IndexedIndirect,
 };
 
 enum class CheckIndex : uint32_t {
@@ -103,7 +105,7 @@
         vertexBody << "  output.vertex_index = input.vertex_index;\n";
 
         fragmentInputs << "  [[location(1)]] vertex_index : u32;\n";
-        fragmentBody << "  idx_vals.vertex_index = input.vertex_index;\n";
+        fragmentBody << "  ignore(atomicMin(&idx_vals.vertex_index, input.vertex_index));\n";
     }
     if ((checkIndex & CheckIndex::Instance) != 0) {
         vertexInputs << "  [[builtin(instance_index)]] instance_index : u32;\n";
@@ -111,7 +113,7 @@
         vertexBody << "  output.instance_index = input.instance_index;\n";
 
         fragmentInputs << "  [[location(2)]] instance_index : u32;\n";
-        fragmentBody << "  idx_vals.instance_index = input.instance_index;\n";
+        fragmentBody << "  ignore(atomicMin(&idx_vals.instance_index, input.instance_index));\n";
     }
 
     std::string vertexShader = R"(
@@ -130,8 +132,8 @@
 
     std::string fragmentShader = R"(
 [[block]] struct IndexVals {
-  vertex_index : u32;
-  instance_index : u32;
+  vertex_index : atomic<u32>;
+  instance_index : atomic<u32>;
 };
 [[group(0), binding(0)]] var<storage, read_write> idx_vals : IndexVals;
 
@@ -161,31 +163,74 @@
 
     std::vector<float> vertexData(firstVertex * kComponentsPerVertex);
     vertexData.insert(vertexData.end(), {0, 0, 0, 1});
+    vertexData.insert(vertexData.end(), {0, 0, 0, 1});
     wgpu::Buffer vertices = utils::CreateBufferFromData(
         device, vertexData.data(), vertexData.size() * sizeof(float), wgpu::BufferUsage::Vertex);
     wgpu::Buffer indices =
         utils::CreateBufferFromData<uint32_t>(device, wgpu::BufferUsage::Index, {0});
+
+    const uint32_t bufferInitialVertex = checkIndex & CheckIndex::Vertex ? std::numeric_limits<uint32_t>::max() : 0;
+    const uint32_t bufferInitialInstance = checkIndex & CheckIndex::Instance ? std::numeric_limits<uint32_t>::max() : 0;
     wgpu::Buffer buffer = utils::CreateBufferFromData(
-        device, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::Storage, {0u, 0u});
+        device, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::Storage, {bufferInitialVertex, bufferInitialInstance});
+
+    wgpu::Buffer indirectBuffer;
+    switch (mode) {
+    case DrawMode::NonIndexed:
+    case DrawMode::Indexed:
+        break;
+    case DrawMode::NonIndexedIndirect:
+        // With DrawIndirect firstInstance is reserved and must be 0 according to spec.
+        ASSERT_EQ(firstInstance, 0u);
+        indirectBuffer = utils::CreateBufferFromData<uint32_t>(device, wgpu::BufferUsage::Indirect, {1, 1, firstVertex, firstInstance});
+        break;
+    case DrawMode::IndexedIndirect:
+        // With DrawIndexedIndirect firstInstance is reserved and must be 0 according to spec.
+        ASSERT_EQ(firstInstance, 0u);
+        indirectBuffer = utils::CreateBufferFromData<uint32_t>(device, wgpu::BufferUsage::Indirect, {1, 1, 0, firstVertex, firstInstance});
+        break;
+    default:
+        FAIL();
+    }
+
+    wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), {{0, buffer}});
 
     wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
     wgpu::RenderPassEncoder pass = encoder.BeginRenderPass(&renderPass.renderPassInfo);
     pass.SetPipeline(pipeline);
     pass.SetVertexBuffer(0, vertices);
-    pass.SetBindGroup(0,
-                      utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), {{0, buffer}}));
-    if (mode == DrawMode::Indexed) {
+    pass.SetBindGroup(0, bindGroup);
+    // Do a first draw to make sure the offset values are correctly updated on the next draw.
+    // We should only see the values from the second draw.
+    pass.Draw(1, 1, firstVertex + 1, firstInstance + 1);
+    switch (mode) {
+    case DrawMode::NonIndexed:
+        pass.Draw(1, 1, firstVertex, firstInstance);
+        break;
+    case DrawMode::Indexed:
         pass.SetIndexBuffer(indices, wgpu::IndexFormat::Uint32);
         pass.DrawIndexed(1, 1, 0, firstVertex, firstInstance);
-
-    } else {
-        pass.Draw(1, 1, firstVertex, firstInstance);
+        break;
+    case DrawMode::NonIndexedIndirect:
+        pass.DrawIndirect(indirectBuffer, 0);
+        break;
+    case DrawMode::IndexedIndirect:
+        pass.SetIndexBuffer(indices, wgpu::IndexFormat::Uint32);
+        pass.DrawIndexedIndirect(indirectBuffer, 0);
+        break;
+    default:
+        FAIL();
     }
     pass.EndPass();
     wgpu::CommandBuffer commands = encoder.Finish();
     queue.Submit(1, &commands);
 
     std::array<uint32_t, 2> expected = {firstVertex, firstInstance};
+    // TODO(dawn:548): remove this once builtins are emulated for indirect draws.
+    // Until then the expected values should always be {0, 0}.
+    if (IsD3D12() && (mode == DrawMode::NonIndexedIndirect || mode == DrawMode::IndexedIndirect)) {
+        expected = {0, 0};
+    }
     EXPECT_BUFFER_U32_RANGE_EQ(expected.data(), buffer, 0, expected.size());
 }
 
@@ -220,6 +265,18 @@
     TestBothIndices(DrawMode::Indexed, 7, 11);
 }
 
+// There are no instance_index tests because the spec forces it to be 0.
+
+// Test that vertex_index starts at 7 when drawn using DrawIndirect()
+TEST_P(FirstIndexOffsetTests, NonIndexedIndirectVertexOffset) {
+    TestVertexIndex(DrawMode::NonIndexedIndirect, 7);
+}
+
+// Test that vertex_index starts at 7 when drawn using DrawIndexedIndirect()
+TEST_P(FirstIndexOffsetTests, IndexedIndirectVertex) {
+    TestVertexIndex(DrawMode::IndexedIndirect, 7);
+}
+
 DAWN_INSTANTIATE_TEST(FirstIndexOffsetTests,
                       D3D12Backend(),
                       MetalBackend(),