Revert "Add tests having multiple indirect dispatch within one pass"

This reverts commit 52d11797f66d0718643d5462954cf6d38c42c232.

Reason for revert: Failing on 32-bit https://ci.chromium.org/ui/p/chromium/builders/ci/Dawn%20Win10%20x86%20Release%20(NVIDIA)/76947/overview

Original change's description:
> Add tests having multiple indirect dispatch within one pass
>
> Add more test coverage for multiple DispatchWorkgroupsIndirect
> calls within one compute shader pass to ensure
> DispatchWorkgroupsIndirect with non-zero indirect offset work
> correctly when duplicating numWorkgroups.
>
> Bug: dawn:2201, dawn:1262
> Change-Id: I598bfd076328b0baa9ef04e61c62c2a7b048ac48
> Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/158821
> Kokoro: Kokoro <noreply+kokoro@google.com>
> Reviewed-by: Austin Eng <enga@chromium.org>
> Reviewed-by: Loko Kung <lokokung@google.com>
> Commit-Queue: Shrek Shao <shrekshao@google.com>

TBR=enga@chromium.org,shrekshao@google.com,noreply+kokoro@google.com,dawn-scoped@luci-project-accounts.iam.gserviceaccount.com,lokokung@google.com

Change-Id: Id6db88f9139ca69025ed3b9e9ac5bee8ed9e13c0
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: dawn:2201, dawn:1262
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/159880
Commit-Queue: Kai Ninomiya <kainino@chromium.org>
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
Kokoro: Kai Ninomiya <kainino@chromium.org>
Reviewed-by: Shrek Shao <shrekshao@google.com>
diff --git a/src/dawn/tests/end2end/ComputeDispatchTests.cpp b/src/dawn/tests/end2end/ComputeDispatchTests.cpp
index b7a1c94..62c6cb5 100644
--- a/src/dawn/tests/end2end/ComputeDispatchTests.cpp
+++ b/src/dawn/tests/end2end/ComputeDispatchTests.cpp
@@ -25,11 +25,9 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include <algorithm>
 #include <initializer_list>
 #include <vector>
 
-#include "dawn/common/Math.h"
 #include "dawn/tests/DawnTest.h"
 #include "dawn/utils/WGPUHelpers.h"
 
@@ -227,6 +225,8 @@
     // TODO(crbug.com/dawn/1196): Fails on Chromium's Quadro P400 bots
     DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsNvidia());
 #endif
+    // TODO(crbug.com/dawn/1262): Fails with the full validation turned on.
+    DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsFullBackendValidationEnabled());
 
     IndirectTest({2, 3, 4}, 0);
 }
@@ -257,6 +257,8 @@
     // TODO(crbug.com/dawn/1196): Fails on Chromium's Quadro P400 bots
     DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsNvidia());
 #endif
+    // TODO(crbug.com/dawn/1262): Fails with the full validation turned on.
+    DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsFullBackendValidationEnabled());
 
     IndirectTest({0, 0, 0, 2, 3, 4}, 3 * sizeof(uint32_t));
 }
@@ -272,6 +274,8 @@
     // TODO(crbug.com/dawn/1196): Fails on Chromium's Quadro P400 bots
     DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsNvidia());
 #endif
+    // TODO(crbug.com/dawn/1262): Fails with the full validation turned on.
+    DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsFullBackendValidationEnabled());
 
     // TODO(crbug.com/dawn/1165): Fails with WARP
     DAWN_SUPPRESS_TEST_IF(IsWARP());
@@ -332,262 +336,5 @@
                       OpenGLESBackend(),
                       VulkanBackend());
 
-namespace {
-using UseNumWorkgoups = bool;
-DAWN_TEST_PARAM_STRUCT(Params, UseNumWorkgoups);
-}  // namespace
-
-class ComputeMultipleDispatchesTests : public DawnTestWithParams<Params> {
-  protected:
-    void SetUp() override {
-        DawnTestWithParams<Params>::SetUp();
-
-        bindGroupLayout = utils::MakeBindGroupLayout(
-            device, {{0, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Uniform,
-                      /* hasDynamicOffset = */ true},
-                     {1, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Storage}});
-
-        wgpu::PipelineLayoutDescriptor pipelineLayoutDescriptor;
-        pipelineLayoutDescriptor.bindGroupLayoutCount = 1;
-        pipelineLayoutDescriptor.bindGroupLayouts = &bindGroupLayout;
-        wgpu::PipelineLayout pipelineLayout =
-            device.CreatePipelineLayout(&pipelineLayoutDescriptor);
-
-        // Write workgroup number into the output buffer if we saw the biggest dispatch
-        // To make sure the dispatch was not called, write maximum u32 value for 0 dispatches
-        wgpu::ShaderModule module = utils::CreateShaderModule(device, R"(
-            @group(0) @binding(0) var<uniform> dispatchId : u32;
-            @group(0) @binding(1) var<storage, read_write> output : array<vec3u>;
-
-            @compute @workgroup_size(1, 1, 1)
-            fn main(@builtin(global_invocation_id) GlobalInvocationID : vec3u,
-                    @builtin(num_workgroups) dispatch : vec3u) {
-                if (dispatch.x == 0u || dispatch.y == 0u || dispatch.z == 0u) {
-                    output[dispatchId] = vec3u(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu);
-                    return;
-                }
-
-                if (all(GlobalInvocationID == dispatch - vec3u(1u, 1u, 1u))) {
-                    output[dispatchId] = dispatch;
-                }
-            })");
-
-        wgpu::ComputePipelineDescriptor csDesc;
-        //
-        csDesc.compute.module = module;
-        csDesc.compute.entryPoint = "main";
-        csDesc.layout = pipelineLayout;
-        pipeline = device.CreateComputePipeline(&csDesc);
-
-        // Test the use of the compute pipelines without using @num_workgroups
-        wgpu::ShaderModule moduleWithoutNumWorkgroups = utils::CreateShaderModule(device, R"(
-            // input.xyz = num_workgroups.xyz, input.w = dispatch call id (i.e. offset in output buffer)
-            @group(0) @binding(0) var<uniform> input : vec4u;
-            @group(0) @binding(1) var<storage, read_write> output : array<vec3u>;
-
-            @compute @workgroup_size(1, 1, 1)
-            fn main(@builtin(global_invocation_id) GlobalInvocationID : vec3u) {
-                let dispatch : vec3u = input.xyz;
-                let dispatchId = input.w;
-
-                if (dispatch.x == 0u || dispatch.y == 0u || dispatch.z == 0u) {
-                    output[dispatchId] = vec3u(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu);
-                    return;
-                }
-
-                if (all(GlobalInvocationID == dispatch - vec3u(1u, 1u, 1u))) {
-                    output[dispatchId] = dispatch;
-                }
-            })");
-        csDesc.compute.module = moduleWithoutNumWorkgroups;
-        csDesc.compute.entryPoint = "main";
-        csDesc.layout = pipelineLayout;
-        pipelineWithoutNumWorkgroups = device.CreateComputePipeline(&csDesc);
-    }
-
-    void IndirectTest(std::vector<uint32_t> indirectBufferData,
-                      std::vector<uint64_t> indirectOffsets) {
-        bool useNumWorkgroups = GetParam().mUseNumWorkgoups;
-        // Set up dst storage buffer to contain dispatch x, y, z
-        wgpu::BufferDescriptor dstBufDescriptor;
-        // array<vec3u> aligns to 16 bytes
-        dstBufDescriptor.size = Align(indirectOffsets.size() * 4u * sizeof(uint32_t), 16u);
-        dstBufDescriptor.usage =
-            wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-        wgpu::Buffer dst = device.CreateBuffer(&dstBufDescriptor);
-
-        wgpu::Buffer indirectBuffer = utils::CreateBufferFromData(
-            device, &indirectBufferData[0], indirectBufferData.size() * sizeof(uint32_t),
-            wgpu::BufferUsage::Indirect | wgpu::BufferUsage::CopySrc);
-
-        // dynamic offset requires a 256 byte alignment. So we store dispatch index every 256 byte
-        // instead of compactly
-        constexpr uint32_t kDynamicOffsetReq = 256;
-        constexpr uint32_t kIndexOffset = kDynamicOffsetReq / sizeof(uint32_t);
-
-        std::vector<uint32_t> dynamicOffsets(indirectOffsets.size());
-        for (size_t i = 0; i < indirectOffsets.size(); i++) {
-            dynamicOffsets[i] = i * kDynamicOffsetReq;
-        }
-
-        // Set up bind group and issue dispatch
-        wgpu::BindGroup bindGroup;
-        wgpu::ComputePipeline computePipelineForTest;
-
-        if (useNumWorkgroups) {
-            computePipelineForTest = pipeline;
-
-            std::vector<uint32_t> dispatchIds(indirectOffsets.size() * kIndexOffset);
-            for (size_t i = 0; i < indirectOffsets.size(); i++) {
-                size_t o = kIndexOffset * i;
-                dispatchIds[o] = i;
-            }
-
-            wgpu::Buffer uniformBuffer = utils::CreateBufferFromData(
-                device, dispatchIds.data(), dispatchIds.size() * sizeof(uint32_t),
-                wgpu::BufferUsage::Uniform);
-            bindGroup = utils::MakeBindGroup(device, bindGroupLayout,
-                                             {
-                                                 {0, uniformBuffer, 0, sizeof(uint32_t)},
-                                                 {1, dst, 0, dstBufDescriptor.size},
-                                             });
-        } else {
-            computePipelineForTest = pipelineWithoutNumWorkgroups;
-
-            std::vector<uint32_t> inputs(indirectOffsets.size() * kIndexOffset);
-            for (size_t i = 0; i < indirectOffsets.size(); i++) {
-                uint32_t indirectStart = indirectOffsets[i] / sizeof(uint32_t);
-                size_t o = kIndexOffset * i;
-                // numWorkgroups
-                inputs[o] = indirectBufferData[indirectStart];
-                inputs[o + 1] = indirectBufferData[indirectStart + 1];
-                inputs[o + 2] = indirectBufferData[indirectStart + 2];
-                // dispatchId
-                inputs[o + 3] = i;
-            }
-
-            wgpu::Buffer uniformBuffer =
-                utils::CreateBufferFromData(device, inputs.data(), inputs.size() * sizeof(uint32_t),
-                                            wgpu::BufferUsage::Uniform);
-            bindGroup = utils::MakeBindGroup(device, bindGroupLayout,
-                                             {
-                                                 {0, uniformBuffer, 0, 4 * sizeof(uint32_t)},
-                                                 {1, dst, 0, dstBufDescriptor.size},
-                                             });
-        }
-
-        wgpu::CommandBuffer commands;
-        {
-            wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
-            wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
-            pass.SetPipeline(computePipelineForTest);
-            for (size_t i = 0; i < indirectOffsets.size(); i++) {
-                uint64_t indirectOffset = indirectOffsets[i];
-                // Use dynamic binding offset to set dispatch Id (used as offset to output buffer)
-                // for each dispatch call
-                pass.SetBindGroup(0, bindGroup, 1, &dynamicOffsets[i]);
-                pass.DispatchWorkgroupsIndirect(indirectBuffer, indirectOffset);
-            }
-
-            pass.End();
-
-            commands = encoder.Finish();
-        }
-
-        queue.Submit(1, &commands);
-
-        uint32_t maxComputeWorkgroupsPerDimension =
-            GetSupportedLimits().limits.maxComputeWorkgroupsPerDimension;
-
-        std::vector<uint32_t> expected(4 * indirectOffsets.size(), 0);
-        for (size_t i = 0; i < indirectOffsets.size(); i++) {
-            uint32_t indirectStart = indirectOffsets[i] / sizeof(uint32_t);
-            size_t o = 4 * i;
-
-            if (indirectBufferData[indirectStart] == 0 ||
-                indirectBufferData[indirectStart + 1] == 0 ||
-                indirectBufferData[indirectStart + 2] == 0 ||
-                indirectBufferData[indirectStart] > maxComputeWorkgroupsPerDimension ||
-                indirectBufferData[indirectStart + 1] > maxComputeWorkgroupsPerDimension ||
-                indirectBufferData[indirectStart + 2] > maxComputeWorkgroupsPerDimension) {
-                std::copy(kSentinelData.begin(), kSentinelData.end(), expected.begin() + o);
-            } else {
-                expected[o] = indirectBufferData[indirectStart];
-                expected[o + 1] = indirectBufferData[indirectStart + 1];
-                expected[o + 2] = indirectBufferData[indirectStart + 2];
-            }
-        }
-
-        // Verify the indirect buffer is not modified
-        EXPECT_BUFFER_U32_RANGE_EQ(&indirectBufferData[0], indirectBuffer, 0,
-                                   indirectBufferData.size());
-        // Verify the dispatch got called with group counts in indirect buffer if all group counts
-        // are not zero
-        EXPECT_BUFFER_U32_RANGE_EQ(&expected[0], dst, 0, expected.size());
-    }
-
-  private:
-    wgpu::ComputePipeline pipeline;
-    wgpu::ComputePipeline pipelineWithoutNumWorkgroups;
-    wgpu::BindGroupLayout bindGroupLayout;
-};
-
-// Test indirect dispatches with buffer offset
-TEST_P(ComputeMultipleDispatchesTests, IndirectOffset) {
-#if DAWN_PLATFORM_IS(32_BIT)
-    // TODO(crbug.com/dawn/1196): Fails on Chromium's Quadro P400 bots
-    DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsNvidia());
-#endif
-
-    // Control case: One DispatchWorkgroupsIndirect call
-    IndirectTest({0, 0, 0, 2, 3, 4}, {3 * sizeof(uint32_t)});
-
-    // Two dispatches: first is no-op
-    IndirectTest({0, 0, 0, 2, 3, 4}, {0, 3 * sizeof(uint32_t)});
-
-    // Two dispatches
-    IndirectTest({9, 8, 7, 2, 3, 4}, {0, 3 * sizeof(uint32_t)});
-
-    // Indirect offsets not in order
-    IndirectTest({9, 8, 7, 2, 3, 4}, {3 * sizeof(uint32_t), 0});
-
-    // Multiple dispatches with duplicate indirect offsets
-    IndirectTest({9, 8, 7, 0, 0, 0, 2, 3, 4, 0xa, 0xb, 0xc, 0xf, 0xe, 0xd},
-                 {
-                     6 * sizeof(uint32_t),
-                     0,
-                     3 * sizeof(uint32_t),
-                     12 * sizeof(uint32_t),
-                     9 * sizeof(uint32_t),
-                     6 * sizeof(uint32_t),
-                     6 * sizeof(uint32_t),
-                 });
-}
-
-// Test indirect dispatches exceeding the max limit with an offset are noop-ed.
-TEST_P(ComputeMultipleDispatchesTests, ExceedsMaxWorkgroupsWithOffsetNoop) {
-    DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("skip_validation"));
-
-    // TODO(crbug.com/dawn/839): Investigate why this test fails with WARP.
-    DAWN_SUPPRESS_TEST_IF(IsWARP());
-
-    uint32_t max = GetSupportedLimits().limits.maxComputeWorkgroupsPerDimension;
-
-    // Two dispatches: first is no-op
-    IndirectTest({max + 1, 1, 1, 2, 3, 4}, {0, 3 * sizeof(uint32_t)});
-
-    // Two dispatches: second is no-op
-    IndirectTest({2, 3, 4, max + 1, 1, 1}, {0, 3 * sizeof(uint32_t)});
-
-    // Two dispatches: second is no-op
-    IndirectTest({max + 1, 1, 1, 2, 3, 4}, {3 * sizeof(uint32_t), 0});
-}
-
-DAWN_INSTANTIATE_TEST_P(ComputeMultipleDispatchesTests,
-                        {D3D11Backend(), D3D12Backend(), MetalBackend(), OpenGLBackend(),
-                         OpenGLESBackend(), VulkanBackend()},
-                        {true, false}  // useNumWorkgroups
-);
-
 }  // anonymous namespace
 }  // namespace dawn