| // Copyright 2019 The Dawn Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "tests/DawnTest.h" |
| |
| #include "utils/WGPUHelpers.h" |
| |
| class ComputeStorageBufferBarrierTests : public DawnTest { |
| protected: |
| static constexpr uint32_t kNumValues = 100; |
| static constexpr uint32_t kIterations = 100; |
| }; |
| |
| // Test that multiple dispatches to increment values in a storage buffer are synchronized. |
| TEST_P(ComputeStorageBufferBarrierTests, AddIncrement) { |
| std::vector<uint32_t> data(kNumValues, 0); |
| std::vector<uint32_t> expected(kNumValues, 0x1234 * kIterations); |
| |
| uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t)); |
| wgpu::Buffer buffer = utils::CreateBufferFromData( |
| device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc); |
| |
| wgpu::ShaderModule module = utils::CreateShaderModule(device, R"( |
| [[block]] struct Buf { |
| data : array<u32, 100>; |
| }; |
| |
| [[group(0), binding(0)]] var<storage> buf : [[access(read_write)]] Buf; |
| |
| [[stage(compute)]] |
| fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) { |
| buf.data[GlobalInvocationID.x] = buf.data[GlobalInvocationID.x] + 0x1234u; |
| } |
| )"); |
| |
| wgpu::ComputePipelineDescriptor pipelineDesc = {}; |
| pipelineDesc.computeStage.module = module; |
| pipelineDesc.computeStage.entryPoint = "main"; |
| wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc); |
| |
| wgpu::BindGroup bindGroup = |
| utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), {{0, buffer, 0, bufferSize}}); |
| |
| wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); |
| wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); |
| pass.SetPipeline(pipeline); |
| pass.SetBindGroup(0, bindGroup); |
| for (uint32_t i = 0; i < kIterations; ++i) { |
| pass.Dispatch(kNumValues); |
| } |
| pass.EndPass(); |
| wgpu::CommandBuffer commands = encoder.Finish(); |
| queue.Submit(1, &commands); |
| |
| EXPECT_BUFFER_U32_RANGE_EQ(expected.data(), buffer, 0, kNumValues); |
| } |
| |
| // Test that multiple dispatches to increment values by ping-ponging between two storage buffers |
| // are synchronized. |
| TEST_P(ComputeStorageBufferBarrierTests, AddPingPong) { |
| std::vector<uint32_t> data(kNumValues, 0); |
| std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations); |
| std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1)); |
| |
| uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t)); |
| |
| wgpu::Buffer bufferA = utils::CreateBufferFromData( |
| device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc); |
| |
| wgpu::Buffer bufferB = utils::CreateBufferFromData( |
| device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc); |
| |
| wgpu::ShaderModule module = utils::CreateShaderModule(device, R"( |
| // TODO(crbug.com/tint/386): Use the same struct. |
| [[block]] struct Src { |
| data : array<u32, 100>; |
| }; |
| |
| [[block]] struct Dst { |
| data : array<u32, 100>; |
| }; |
| |
| [[group(0), binding(0)]] var<storage> src : [[access(read_write)]] Src; |
| [[group(0), binding(1)]] var<storage> dst : [[access(read_write)]] Dst; |
| |
| [[stage(compute)]] |
| fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) { |
| dst.data[GlobalInvocationID.x] = src.data[GlobalInvocationID.x] + 0x1234u; |
| } |
| )"); |
| |
| wgpu::ComputePipelineDescriptor pipelineDesc = {}; |
| pipelineDesc.computeStage.module = module; |
| pipelineDesc.computeStage.entryPoint = "main"; |
| wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc); |
| |
| wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), |
| { |
| {0, bufferA, 0, bufferSize}, |
| {1, bufferB, 0, bufferSize}, |
| }); |
| |
| wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), |
| { |
| {0, bufferB, 0, bufferSize}, |
| {1, bufferA, 0, bufferSize}, |
| }); |
| |
| wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB}; |
| |
| wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); |
| wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); |
| pass.SetPipeline(pipeline); |
| |
| for (uint32_t i = 0; i < kIterations / 2; ++i) { |
| pass.SetBindGroup(0, bindGroups[0]); |
| pass.Dispatch(kNumValues); |
| pass.SetBindGroup(0, bindGroups[1]); |
| pass.Dispatch(kNumValues); |
| } |
| pass.EndPass(); |
| wgpu::CommandBuffer commands = encoder.Finish(); |
| queue.Submit(1, &commands); |
| |
| EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues); |
| EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues); |
| } |
| |
| // Test that multiple dispatches to increment values by ping-ponging between storage buffers and |
| // read-only storage buffers are synchronized in one compute pass. |
| TEST_P(ComputeStorageBufferBarrierTests, StorageAndReadonlyStoragePingPongInOnePass) { |
| std::vector<uint32_t> data(kNumValues, 0); |
| std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations); |
| std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1)); |
| |
| uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t)); |
| |
| wgpu::Buffer bufferA = utils::CreateBufferFromData( |
| device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc); |
| |
| wgpu::Buffer bufferB = utils::CreateBufferFromData( |
| device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc); |
| |
| wgpu::ShaderModule module = utils::CreateShaderModule(device, R"( |
| // TODO(crbug.com/tint/386): Use the same struct. |
| [[block]] struct Src { |
| data : array<u32, 100>; |
| }; |
| |
| [[block]] struct Dst { |
| data : array<u32, 100>; |
| }; |
| |
| [[group(0), binding(0)]] var<storage> src : [[access(read)]] Src; |
| [[group(0), binding(1)]] var<storage> dst : [[access(read_write)]] Dst; |
| |
| [[stage(compute)]] |
| fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) { |
| dst.data[GlobalInvocationID.x] = src.data[GlobalInvocationID.x] + 0x1234u; |
| } |
| )"); |
| |
| wgpu::ComputePipelineDescriptor pipelineDesc = {}; |
| pipelineDesc.computeStage.module = module; |
| pipelineDesc.computeStage.entryPoint = "main"; |
| wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc); |
| |
| wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), |
| { |
| {0, bufferA, 0, bufferSize}, |
| {1, bufferB, 0, bufferSize}, |
| }); |
| |
| wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), |
| { |
| {0, bufferB, 0, bufferSize}, |
| {1, bufferA, 0, bufferSize}, |
| }); |
| |
| wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB}; |
| |
| wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); |
| wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); |
| pass.SetPipeline(pipeline); |
| |
| for (uint32_t i = 0; i < kIterations / 2; ++i) { |
| pass.SetBindGroup(0, bindGroups[0]); |
| pass.Dispatch(kNumValues); |
| pass.SetBindGroup(0, bindGroups[1]); |
| pass.Dispatch(kNumValues); |
| } |
| pass.EndPass(); |
| wgpu::CommandBuffer commands = encoder.Finish(); |
| queue.Submit(1, &commands); |
| |
| EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues); |
| EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues); |
| } |
| |
| // Test that Storage to Uniform buffer transitions work and synchronize correctly |
| // by ping-ponging between Storage/Uniform usage in sequential compute passes. |
| TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPong) { |
| std::vector<uint32_t> data(kNumValues, 0); |
| std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations); |
| std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1)); |
| |
| uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t)); |
| |
| wgpu::Buffer bufferA = utils::CreateBufferFromData( |
| device, data.data(), bufferSize, |
| wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc); |
| |
| wgpu::Buffer bufferB = utils::CreateBufferFromData( |
| device, data.data(), bufferSize, |
| wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc); |
| |
| wgpu::ShaderModule module = utils::CreateShaderModule(device, R"( |
| [[block]] struct Buf { |
| data : array<vec4<u32>, 25>; |
| }; |
| |
| [[group(0), binding(0)]] var<uniform> src : Buf; |
| [[group(0), binding(1)]] var<storage> dst : [[access(read_write)]] Buf; |
| |
| [[stage(compute)]] |
| fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) { |
| dst.data[GlobalInvocationID.x] = src.data[GlobalInvocationID.x] + |
| vec4<u32>(0x1234u, 0x1234u, 0x1234u, 0x1234u); |
| } |
| )"); |
| |
| wgpu::ComputePipelineDescriptor pipelineDesc = {}; |
| pipelineDesc.computeStage.module = module; |
| pipelineDesc.computeStage.entryPoint = "main"; |
| wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc); |
| |
| wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), |
| { |
| {0, bufferA, 0, bufferSize}, |
| {1, bufferB, 0, bufferSize}, |
| }); |
| |
| wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), |
| { |
| {0, bufferB, 0, bufferSize}, |
| {1, bufferA, 0, bufferSize}, |
| }); |
| |
| wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB}; |
| |
| wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); |
| |
| for (uint32_t i = 0, b = 0; i < kIterations; ++i, b = 1 - b) { |
| wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); |
| pass.SetPipeline(pipeline); |
| pass.SetBindGroup(0, bindGroups[b]); |
| pass.Dispatch(kNumValues / 4); |
| pass.EndPass(); |
| } |
| |
| wgpu::CommandBuffer commands = encoder.Finish(); |
| queue.Submit(1, &commands); |
| |
| EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues); |
| EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues); |
| } |
| |
| // Test that Storage to Uniform buffer transitions work and synchronize correctly |
| // by ping-ponging between Storage/Uniform usage in one compute pass. |
| TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPongInOnePass) { |
| std::vector<uint32_t> data(kNumValues, 0); |
| std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations); |
| std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1)); |
| |
| uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t)); |
| |
| wgpu::Buffer bufferA = utils::CreateBufferFromData( |
| device, data.data(), bufferSize, |
| wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc); |
| |
| wgpu::Buffer bufferB = utils::CreateBufferFromData( |
| device, data.data(), bufferSize, |
| wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc); |
| |
| wgpu::ShaderModule module = utils::CreateShaderModule(device, R"( |
| [[block]] struct Buf { |
| data : array<vec4<u32>, 25>; |
| }; |
| |
| [[group(0), binding(0)]] var<uniform> src : Buf; |
| [[group(0), binding(1)]] var<storage> dst : [[access(read_write)]] Buf; |
| |
| [[stage(compute)]] |
| fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) { |
| dst.data[GlobalInvocationID.x] = src.data[GlobalInvocationID.x] + |
| vec4<u32>(0x1234u, 0x1234u, 0x1234u, 0x1234u); |
| } |
| )"); |
| |
| wgpu::ComputePipelineDescriptor pipelineDesc = {}; |
| pipelineDesc.computeStage.module = module; |
| pipelineDesc.computeStage.entryPoint = "main"; |
| wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc); |
| |
| wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), |
| { |
| {0, bufferA, 0, bufferSize}, |
| {1, bufferB, 0, bufferSize}, |
| }); |
| |
| wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), |
| { |
| {0, bufferB, 0, bufferSize}, |
| {1, bufferA, 0, bufferSize}, |
| }); |
| |
| wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB}; |
| |
| wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); |
| wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); |
| for (uint32_t i = 0, b = 0; i < kIterations; ++i, b = 1 - b) { |
| pass.SetPipeline(pipeline); |
| pass.SetBindGroup(0, bindGroups[b]); |
| pass.Dispatch(kNumValues / 4); |
| } |
| pass.EndPass(); |
| |
| wgpu::CommandBuffer commands = encoder.Finish(); |
| queue.Submit(1, &commands); |
| |
| EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues); |
| EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues); |
| } |
| |
| DAWN_INSTANTIATE_TEST(ComputeStorageBufferBarrierTests, |
| D3D12Backend(), |
| MetalBackend(), |
| OpenGLBackend(), |
| OpenGLESBackend(), |
| VulkanBackend()); |