Revert "Add tests having multiple indirect dispatch within one pass"
This reverts commit 52d11797f66d0718643d5462954cf6d38c42c232.
Reason for revert: Failing on 32-bit https://ci.chromium.org/ui/p/chromium/builders/ci/Dawn%20Win10%20x86%20Release%20(NVIDIA)/76947/overview
Original change's description:
> Add tests having multiple indirect dispatch within one pass
>
> Add more test coverage for multiple DispatchWorkgroupsIndirect
> calls within one compute shader pass to ensure
> DispatchWorkgroupsIndirect with non-zero indirect offset work
> correctly when duplicating numWorkgroups.
>
> Bug: dawn:2201, dawn:1262
> Change-Id: I598bfd076328b0baa9ef04e61c62c2a7b048ac48
> Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/158821
> Kokoro: Kokoro <noreply+kokoro@google.com>
> Reviewed-by: Austin Eng <enga@chromium.org>
> Reviewed-by: Loko Kung <lokokung@google.com>
> Commit-Queue: Shrek Shao <shrekshao@google.com>
TBR=enga@chromium.org,shrekshao@google.com,noreply+kokoro@google.com,dawn-scoped@luci-project-accounts.iam.gserviceaccount.com,lokokung@google.com
Change-Id: Id6db88f9139ca69025ed3b9e9ac5bee8ed9e13c0
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: dawn:2201, dawn:1262
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/159880
Commit-Queue: Kai Ninomiya <kainino@chromium.org>
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
Kokoro: Kai Ninomiya <kainino@chromium.org>
Reviewed-by: Shrek Shao <shrekshao@google.com>
diff --git a/src/dawn/tests/end2end/ComputeDispatchTests.cpp b/src/dawn/tests/end2end/ComputeDispatchTests.cpp
index b7a1c94..62c6cb5 100644
--- a/src/dawn/tests/end2end/ComputeDispatchTests.cpp
+++ b/src/dawn/tests/end2end/ComputeDispatchTests.cpp
@@ -25,11 +25,9 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include <algorithm>
#include <initializer_list>
#include <vector>
-#include "dawn/common/Math.h"
#include "dawn/tests/DawnTest.h"
#include "dawn/utils/WGPUHelpers.h"
@@ -227,6 +225,8 @@
// TODO(crbug.com/dawn/1196): Fails on Chromium's Quadro P400 bots
DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsNvidia());
#endif
+ // TODO(crbug.com/dawn/1262): Fails with the full validation turned on.
+ DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsFullBackendValidationEnabled());
IndirectTest({2, 3, 4}, 0);
}
@@ -257,6 +257,8 @@
// TODO(crbug.com/dawn/1196): Fails on Chromium's Quadro P400 bots
DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsNvidia());
#endif
+ // TODO(crbug.com/dawn/1262): Fails with the full validation turned on.
+ DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsFullBackendValidationEnabled());
IndirectTest({0, 0, 0, 2, 3, 4}, 3 * sizeof(uint32_t));
}
@@ -272,6 +274,8 @@
// TODO(crbug.com/dawn/1196): Fails on Chromium's Quadro P400 bots
DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsNvidia());
#endif
+ // TODO(crbug.com/dawn/1262): Fails with the full validation turned on.
+ DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsFullBackendValidationEnabled());
// TODO(crbug.com/dawn/1165): Fails with WARP
DAWN_SUPPRESS_TEST_IF(IsWARP());
@@ -332,262 +336,5 @@
OpenGLESBackend(),
VulkanBackend());
-namespace {
-using UseNumWorkgoups = bool;
-DAWN_TEST_PARAM_STRUCT(Params, UseNumWorkgoups);
-} // namespace
-
-class ComputeMultipleDispatchesTests : public DawnTestWithParams<Params> {
- protected:
- void SetUp() override {
- DawnTestWithParams<Params>::SetUp();
-
- bindGroupLayout = utils::MakeBindGroupLayout(
- device, {{0, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Uniform,
- /* hasDynamicOffset = */ true},
- {1, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Storage}});
-
- wgpu::PipelineLayoutDescriptor pipelineLayoutDescriptor;
- pipelineLayoutDescriptor.bindGroupLayoutCount = 1;
- pipelineLayoutDescriptor.bindGroupLayouts = &bindGroupLayout;
- wgpu::PipelineLayout pipelineLayout =
- device.CreatePipelineLayout(&pipelineLayoutDescriptor);
-
- // Write workgroup number into the output buffer if we saw the biggest dispatch
- // To make sure the dispatch was not called, write maximum u32 value for 0 dispatches
- wgpu::ShaderModule module = utils::CreateShaderModule(device, R"(
- @group(0) @binding(0) var<uniform> dispatchId : u32;
- @group(0) @binding(1) var<storage, read_write> output : array<vec3u>;
-
- @compute @workgroup_size(1, 1, 1)
- fn main(@builtin(global_invocation_id) GlobalInvocationID : vec3u,
- @builtin(num_workgroups) dispatch : vec3u) {
- if (dispatch.x == 0u || dispatch.y == 0u || dispatch.z == 0u) {
- output[dispatchId] = vec3u(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu);
- return;
- }
-
- if (all(GlobalInvocationID == dispatch - vec3u(1u, 1u, 1u))) {
- output[dispatchId] = dispatch;
- }
- })");
-
- wgpu::ComputePipelineDescriptor csDesc;
- //
- csDesc.compute.module = module;
- csDesc.compute.entryPoint = "main";
- csDesc.layout = pipelineLayout;
- pipeline = device.CreateComputePipeline(&csDesc);
-
- // Test the use of the compute pipelines without using @num_workgroups
- wgpu::ShaderModule moduleWithoutNumWorkgroups = utils::CreateShaderModule(device, R"(
- // input.xyz = num_workgroups.xyz, input.w = dispatch call id (i.e. offset in output buffer)
- @group(0) @binding(0) var<uniform> input : vec4u;
- @group(0) @binding(1) var<storage, read_write> output : array<vec3u>;
-
- @compute @workgroup_size(1, 1, 1)
- fn main(@builtin(global_invocation_id) GlobalInvocationID : vec3u) {
- let dispatch : vec3u = input.xyz;
- let dispatchId = input.w;
-
- if (dispatch.x == 0u || dispatch.y == 0u || dispatch.z == 0u) {
- output[dispatchId] = vec3u(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu);
- return;
- }
-
- if (all(GlobalInvocationID == dispatch - vec3u(1u, 1u, 1u))) {
- output[dispatchId] = dispatch;
- }
- })");
- csDesc.compute.module = moduleWithoutNumWorkgroups;
- csDesc.compute.entryPoint = "main";
- csDesc.layout = pipelineLayout;
- pipelineWithoutNumWorkgroups = device.CreateComputePipeline(&csDesc);
- }
-
- void IndirectTest(std::vector<uint32_t> indirectBufferData,
- std::vector<uint64_t> indirectOffsets) {
- bool useNumWorkgroups = GetParam().mUseNumWorkgoups;
- // Set up dst storage buffer to contain dispatch x, y, z
- wgpu::BufferDescriptor dstBufDescriptor;
- // array<vec3u> aligns to 16 bytes
- dstBufDescriptor.size = Align(indirectOffsets.size() * 4u * sizeof(uint32_t), 16u);
- dstBufDescriptor.usage =
- wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
- wgpu::Buffer dst = device.CreateBuffer(&dstBufDescriptor);
-
- wgpu::Buffer indirectBuffer = utils::CreateBufferFromData(
- device, &indirectBufferData[0], indirectBufferData.size() * sizeof(uint32_t),
- wgpu::BufferUsage::Indirect | wgpu::BufferUsage::CopySrc);
-
- // dynamic offset requires a 256 byte alignment. So we store dispatch index every 256 byte
- // instead of compactly
- constexpr uint32_t kDynamicOffsetReq = 256;
- constexpr uint32_t kIndexOffset = kDynamicOffsetReq / sizeof(uint32_t);
-
- std::vector<uint32_t> dynamicOffsets(indirectOffsets.size());
- for (size_t i = 0; i < indirectOffsets.size(); i++) {
- dynamicOffsets[i] = i * kDynamicOffsetReq;
- }
-
- // Set up bind group and issue dispatch
- wgpu::BindGroup bindGroup;
- wgpu::ComputePipeline computePipelineForTest;
-
- if (useNumWorkgroups) {
- computePipelineForTest = pipeline;
-
- std::vector<uint32_t> dispatchIds(indirectOffsets.size() * kIndexOffset);
- for (size_t i = 0; i < indirectOffsets.size(); i++) {
- size_t o = kIndexOffset * i;
- dispatchIds[o] = i;
- }
-
- wgpu::Buffer uniformBuffer = utils::CreateBufferFromData(
- device, dispatchIds.data(), dispatchIds.size() * sizeof(uint32_t),
- wgpu::BufferUsage::Uniform);
- bindGroup = utils::MakeBindGroup(device, bindGroupLayout,
- {
- {0, uniformBuffer, 0, sizeof(uint32_t)},
- {1, dst, 0, dstBufDescriptor.size},
- });
- } else {
- computePipelineForTest = pipelineWithoutNumWorkgroups;
-
- std::vector<uint32_t> inputs(indirectOffsets.size() * kIndexOffset);
- for (size_t i = 0; i < indirectOffsets.size(); i++) {
- uint32_t indirectStart = indirectOffsets[i] / sizeof(uint32_t);
- size_t o = kIndexOffset * i;
- // numWorkgroups
- inputs[o] = indirectBufferData[indirectStart];
- inputs[o + 1] = indirectBufferData[indirectStart + 1];
- inputs[o + 2] = indirectBufferData[indirectStart + 2];
- // dispatchId
- inputs[o + 3] = i;
- }
-
- wgpu::Buffer uniformBuffer =
- utils::CreateBufferFromData(device, inputs.data(), inputs.size() * sizeof(uint32_t),
- wgpu::BufferUsage::Uniform);
- bindGroup = utils::MakeBindGroup(device, bindGroupLayout,
- {
- {0, uniformBuffer, 0, 4 * sizeof(uint32_t)},
- {1, dst, 0, dstBufDescriptor.size},
- });
- }
-
- wgpu::CommandBuffer commands;
- {
- wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
- wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
- pass.SetPipeline(computePipelineForTest);
- for (size_t i = 0; i < indirectOffsets.size(); i++) {
- uint64_t indirectOffset = indirectOffsets[i];
- // Use dynamic binding offset to set dispatch Id (used as offset to output buffer)
- // for each dispatch call
- pass.SetBindGroup(0, bindGroup, 1, &dynamicOffsets[i]);
- pass.DispatchWorkgroupsIndirect(indirectBuffer, indirectOffset);
- }
-
- pass.End();
-
- commands = encoder.Finish();
- }
-
- queue.Submit(1, &commands);
-
- uint32_t maxComputeWorkgroupsPerDimension =
- GetSupportedLimits().limits.maxComputeWorkgroupsPerDimension;
-
- std::vector<uint32_t> expected(4 * indirectOffsets.size(), 0);
- for (size_t i = 0; i < indirectOffsets.size(); i++) {
- uint32_t indirectStart = indirectOffsets[i] / sizeof(uint32_t);
- size_t o = 4 * i;
-
- if (indirectBufferData[indirectStart] == 0 ||
- indirectBufferData[indirectStart + 1] == 0 ||
- indirectBufferData[indirectStart + 2] == 0 ||
- indirectBufferData[indirectStart] > maxComputeWorkgroupsPerDimension ||
- indirectBufferData[indirectStart + 1] > maxComputeWorkgroupsPerDimension ||
- indirectBufferData[indirectStart + 2] > maxComputeWorkgroupsPerDimension) {
- std::copy(kSentinelData.begin(), kSentinelData.end(), expected.begin() + o);
- } else {
- expected[o] = indirectBufferData[indirectStart];
- expected[o + 1] = indirectBufferData[indirectStart + 1];
- expected[o + 2] = indirectBufferData[indirectStart + 2];
- }
- }
-
- // Verify the indirect buffer is not modified
- EXPECT_BUFFER_U32_RANGE_EQ(&indirectBufferData[0], indirectBuffer, 0,
- indirectBufferData.size());
- // Verify the dispatch got called with group counts in indirect buffer if all group counts
- // are not zero
- EXPECT_BUFFER_U32_RANGE_EQ(&expected[0], dst, 0, expected.size());
- }
-
- private:
- wgpu::ComputePipeline pipeline;
- wgpu::ComputePipeline pipelineWithoutNumWorkgroups;
- wgpu::BindGroupLayout bindGroupLayout;
-};
-
-// Test indirect dispatches with buffer offset
-TEST_P(ComputeMultipleDispatchesTests, IndirectOffset) {
-#if DAWN_PLATFORM_IS(32_BIT)
- // TODO(crbug.com/dawn/1196): Fails on Chromium's Quadro P400 bots
- DAWN_SUPPRESS_TEST_IF(IsD3D12() && IsNvidia());
-#endif
-
- // Control case: One DispatchWorkgroupsIndirect call
- IndirectTest({0, 0, 0, 2, 3, 4}, {3 * sizeof(uint32_t)});
-
- // Two dispatches: first is no-op
- IndirectTest({0, 0, 0, 2, 3, 4}, {0, 3 * sizeof(uint32_t)});
-
- // Two dispatches
- IndirectTest({9, 8, 7, 2, 3, 4}, {0, 3 * sizeof(uint32_t)});
-
- // Indirect offsets not in order
- IndirectTest({9, 8, 7, 2, 3, 4}, {3 * sizeof(uint32_t), 0});
-
- // Multiple dispatches with duplicate indirect offsets
- IndirectTest({9, 8, 7, 0, 0, 0, 2, 3, 4, 0xa, 0xb, 0xc, 0xf, 0xe, 0xd},
- {
- 6 * sizeof(uint32_t),
- 0,
- 3 * sizeof(uint32_t),
- 12 * sizeof(uint32_t),
- 9 * sizeof(uint32_t),
- 6 * sizeof(uint32_t),
- 6 * sizeof(uint32_t),
- });
-}
-
-// Test indirect dispatches exceeding the max limit with an offset are noop-ed.
-TEST_P(ComputeMultipleDispatchesTests, ExceedsMaxWorkgroupsWithOffsetNoop) {
- DAWN_TEST_UNSUPPORTED_IF(HasToggleEnabled("skip_validation"));
-
- // TODO(crbug.com/dawn/839): Investigate why this test fails with WARP.
- DAWN_SUPPRESS_TEST_IF(IsWARP());
-
- uint32_t max = GetSupportedLimits().limits.maxComputeWorkgroupsPerDimension;
-
- // Two dispatches: first is no-op
- IndirectTest({max + 1, 1, 1, 2, 3, 4}, {0, 3 * sizeof(uint32_t)});
-
- // Two dispatches: second is no-op
- IndirectTest({2, 3, 4, max + 1, 1, 1}, {0, 3 * sizeof(uint32_t)});
-
- // Two dispatches: second is no-op
- IndirectTest({max + 1, 1, 1, 2, 3, 4}, {3 * sizeof(uint32_t), 0});
-}
-
-DAWN_INSTANTIATE_TEST_P(ComputeMultipleDispatchesTests,
- {D3D11Backend(), D3D12Backend(), MetalBackend(), OpenGLBackend(),
- OpenGLESBackend(), VulkanBackend()},
- {true, false} // useNumWorkgroups
-);
-
} // anonymous namespace
} // namespace dawn