src/dawn/tests/end2end/ComputeSharedMemoryTests.cpp - dawn - Git at Google

 // Copyright 2019 The Dawn Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include <array>

 #include "dawn/tests/DawnTest.h"

 #include "dawn/utils/WGPUHelpers.h"

 class ComputeSharedMemoryTests : public DawnTest {
   public:
     static constexpr uint32_t kInstances = 11;

     void BasicTest(const char* shader);
 };

 void ComputeSharedMemoryTests::BasicTest(const char* shader) {
     // Set up shader and pipeline
     auto module = utils::CreateShaderModule(device, shader);

     wgpu::ComputePipelineDescriptor csDesc;
     csDesc.compute.module = module;
     csDesc.compute.entryPoint = "main";
     wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc);

     // Set up dst storage buffer
     wgpu::BufferDescriptor dstDesc;
     dstDesc.size = sizeof(uint32_t);
     dstDesc.usage =
         wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
     wgpu::Buffer dst = device.CreateBuffer(&dstDesc);

     const uint32_t zero = 0;
     queue.WriteBuffer(dst, 0, &zero, sizeof(zero));

     // Set up bind group and issue dispatch
     wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
                                                      {
                                                          {0, dst, 0, sizeof(uint32_t)},
                                                      });

     wgpu::CommandBuffer commands;
     {
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
         wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
         pass.SetPipeline(pipeline);
         pass.SetBindGroup(0, bindGroup);
         pass.DispatchWorkgroups(1);
         pass.End();

         commands = encoder.Finish();
     }

     queue.Submit(1, &commands);

     const uint32_t expected = kInstances;
     EXPECT_BUFFER_U32_RANGE_EQ(&expected, dst, 0, 1);
 }

 // Basic shared memory test
 TEST_P(ComputeSharedMemoryTests, Basic) {
     BasicTest(R"(
         const kTileSize : u32 = 4u;
         const kInstances : u32 = 11u;

         struct Dst {
             x : u32
         }

         @group(0) @binding(0) var<storage, read_write> dst : Dst;
         var<workgroup> tmp : u32;

         @compute @workgroup_size(4,4,1)
         fn main(@builtin(local_invocation_id) LocalInvocationID : vec3<u32>) {
             let index : u32 = LocalInvocationID.y * kTileSize + LocalInvocationID.x;
             if (index == 0u) {
                 tmp = 0u;
             }
             workgroupBarrier();
             for (var i : u32 = 0u; i < kInstances; i = i + 1u) {
                 if (i == index) {
                     tmp = tmp + 1u;
                 }
                 workgroupBarrier();
             }
             if (index == 0u) {
                 dst.x = tmp;
             }
         })");
 }

 // Test using assorted types in workgroup memory. MSL lacks constructors
 // for matrices in threadgroup memory. Basic test that reading and
 // writing a matrix in workgroup memory works.
 TEST_P(ComputeSharedMemoryTests, AssortedTypes) {
     wgpu::ComputePipelineDescriptor csDesc;
     csDesc.compute.module = utils::CreateShaderModule(device, R"(
         struct StructValues {
             m: mat2x2<f32>
         }

         struct Dst {
             d_struct : StructValues,
             d_matrix : mat2x2<f32>,
             d_array : array<u32, 4>,
             d_vector : vec4<f32>,
         }

         @group(0) @binding(0) var<storage, read_write> dst : Dst;

         var<workgroup> wg_struct : StructValues;
         var<workgroup> wg_matrix : mat2x2<f32>;
         var<workgroup> wg_array : array<u32, 4>;
         var<workgroup> wg_vector : vec4<f32>;

         @compute @workgroup_size(4,1,1)
         fn main(@builtin(local_invocation_id) LocalInvocationID : vec3<u32>) {

             let i = 4u * LocalInvocationID.x;
             if (LocalInvocationID.x == 0u) {
                 wg_struct.m = mat2x2<f32>(
                     vec2<f32>(f32(i), f32(i + 1u)),
                     vec2<f32>(f32(i + 2u), f32(i + 3u)));
             } else if (LocalInvocationID.x == 1u) {
                 wg_matrix = mat2x2<f32>(
                     vec2<f32>(f32(i), f32(i + 1u)),
                     vec2<f32>(f32(i + 2u), f32(i + 3u)));
             } else if (LocalInvocationID.x == 2u) {
                 wg_array[0u] = i;
                 wg_array[1u] = i + 1u;
                 wg_array[2u] = i + 2u;
                 wg_array[3u] = i + 3u;
             } else if (LocalInvocationID.x == 3u) {
                 wg_vector = vec4<f32>(
                     f32(i), f32(i + 1u), f32(i + 2u), f32(i + 3u));
             }

             workgroupBarrier();

             if (LocalInvocationID.x == 0u) {
                 dst.d_struct = wg_struct;
                 dst.d_matrix = wg_matrix;
                 dst.d_array = wg_array;
                 dst.d_vector = wg_vector;
             }
         }
     )");
     csDesc.compute.entryPoint = "main";
     wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc);

     // Set up dst storage buffer
     wgpu::BufferDescriptor dstDesc;
     dstDesc.size = 64;
     dstDesc.usage =
         wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
     wgpu::Buffer dst = device.CreateBuffer(&dstDesc);

     // Set up bind group and issue dispatch
     wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
                                                      {
                                                          {0, dst},
                                                      });

     wgpu::CommandBuffer commands;
     {
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
         wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
         pass.SetPipeline(pipeline);
         pass.SetBindGroup(0, bindGroup);
         pass.DispatchWorkgroups(1);
         pass.End();

         commands = encoder.Finish();
     }

     queue.Submit(1, &commands);

     std::array<float, 4> expectedStruct = {0., 1., 2., 3.};
     std::array<float, 4> expectedMatrix = {4., 5., 6., 7.};
     std::array<uint32_t, 4> expectedArray = {8, 9, 10, 11};
     std::array<float, 4> expectedVector = {12., 13., 14., 15.};
     EXPECT_BUFFER_FLOAT_RANGE_EQ(expectedStruct.data(), dst, 0, 4);
     EXPECT_BUFFER_FLOAT_RANGE_EQ(expectedMatrix.data(), dst, 16, 4);
     EXPECT_BUFFER_U32_RANGE_EQ(expectedArray.data(), dst, 32, 4);
     EXPECT_BUFFER_FLOAT_RANGE_EQ(expectedVector.data(), dst, 48, 4);
 }

 DAWN_INSTANTIATE_TEST(ComputeSharedMemoryTests,
                       D3D12Backend(),
                       MetalBackend(),
                       OpenGLBackend(),
                       OpenGLESBackend(),
                       VulkanBackend(),
                       VulkanBackend({}, {"use_vulkan_zero_initialize_workgroup_memory_extension"}));
	// Copyright 2019 The Dawn Authors
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include <array>

	#include "dawn/tests/DawnTest.h"

	#include "dawn/utils/WGPUHelpers.h"

	class ComputeSharedMemoryTests : public DawnTest {
	public:
	static constexpr uint32_t kInstances = 11;

	void BasicTest(const char* shader);
	};

	void ComputeSharedMemoryTests::BasicTest(const char* shader) {
	// Set up shader and pipeline
	auto module = utils::CreateShaderModule(device, shader);

	wgpu::ComputePipelineDescriptor csDesc;
	csDesc.compute.module = module;
	csDesc.compute.entryPoint = "main";
	wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc);

	// Set up dst storage buffer
	wgpu::BufferDescriptor dstDesc;
	dstDesc.size = sizeof(uint32_t);
	dstDesc.usage =
	wgpu::BufferUsage::Storage \| wgpu::BufferUsage::CopySrc \| wgpu::BufferUsage::CopyDst;
	wgpu::Buffer dst = device.CreateBuffer(&dstDesc);

	const uint32_t zero = 0;
	queue.WriteBuffer(dst, 0, &zero, sizeof(zero));

	// Set up bind group and issue dispatch
	wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
	{
	{0, dst, 0, sizeof(uint32_t)},
	});

	wgpu::CommandBuffer commands;
	{
	wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
	wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
	pass.SetPipeline(pipeline);
	pass.SetBindGroup(0, bindGroup);
	pass.DispatchWorkgroups(1);
	pass.End();

	commands = encoder.Finish();
	}

	queue.Submit(1, &commands);

	const uint32_t expected = kInstances;
	EXPECT_BUFFER_U32_RANGE_EQ(&expected, dst, 0, 1);
	}

	// Basic shared memory test
	TEST_P(ComputeSharedMemoryTests, Basic) {
	BasicTest(R"(
	const kTileSize : u32 = 4u;
	const kInstances : u32 = 11u;

	struct Dst {
	x : u32
	}

	@group(0) @binding(0) var<storage, read_write> dst : Dst;
	var<workgroup> tmp : u32;

	@compute @workgroup_size(4,4,1)
	fn main(@builtin(local_invocation_id) LocalInvocationID : vec3<u32>) {
	let index : u32 = LocalInvocationID.y * kTileSize + LocalInvocationID.x;
	if (index == 0u) {
	tmp = 0u;
	}
	workgroupBarrier();
	for (var i : u32 = 0u; i < kInstances; i = i + 1u) {
	if (i == index) {
	tmp = tmp + 1u;
	}
	workgroupBarrier();
	}
	if (index == 0u) {
	dst.x = tmp;
	}
	})");
	}

	// Test using assorted types in workgroup memory. MSL lacks constructors
	// for matrices in threadgroup memory. Basic test that reading and
	// writing a matrix in workgroup memory works.
	TEST_P(ComputeSharedMemoryTests, AssortedTypes) {
	wgpu::ComputePipelineDescriptor csDesc;
	csDesc.compute.module = utils::CreateShaderModule(device, R"(
	struct StructValues {
	m: mat2x2<f32>
	}

	struct Dst {
	d_struct : StructValues,
	d_matrix : mat2x2<f32>,
	d_array : array<u32, 4>,
	d_vector : vec4<f32>,
	}

	@group(0) @binding(0) var<storage, read_write> dst : Dst;

	var<workgroup> wg_struct : StructValues;
	var<workgroup> wg_matrix : mat2x2<f32>;
	var<workgroup> wg_array : array<u32, 4>;
	var<workgroup> wg_vector : vec4<f32>;

	@compute @workgroup_size(4,1,1)
	fn main(@builtin(local_invocation_id) LocalInvocationID : vec3<u32>) {

	let i = 4u * LocalInvocationID.x;
	if (LocalInvocationID.x == 0u) {
	wg_struct.m = mat2x2<f32>(
	vec2<f32>(f32(i), f32(i + 1u)),
	vec2<f32>(f32(i + 2u), f32(i + 3u)));
	} else if (LocalInvocationID.x == 1u) {
	wg_matrix = mat2x2<f32>(
	vec2<f32>(f32(i), f32(i + 1u)),
	vec2<f32>(f32(i + 2u), f32(i + 3u)));
	} else if (LocalInvocationID.x == 2u) {
	wg_array[0u] = i;
	wg_array[1u] = i + 1u;
	wg_array[2u] = i + 2u;
	wg_array[3u] = i + 3u;
	} else if (LocalInvocationID.x == 3u) {
	wg_vector = vec4<f32>(
	f32(i), f32(i + 1u), f32(i + 2u), f32(i + 3u));
	}

	workgroupBarrier();

	if (LocalInvocationID.x == 0u) {
	dst.d_struct = wg_struct;
	dst.d_matrix = wg_matrix;
	dst.d_array = wg_array;
	dst.d_vector = wg_vector;
	}
	}
	)");
	csDesc.compute.entryPoint = "main";
	wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc);

	// Set up dst storage buffer
	wgpu::BufferDescriptor dstDesc;
	dstDesc.size = 64;
	dstDesc.usage =
	wgpu::BufferUsage::Storage \| wgpu::BufferUsage::CopySrc \| wgpu::BufferUsage::CopyDst;
	wgpu::Buffer dst = device.CreateBuffer(&dstDesc);

	// Set up bind group and issue dispatch
	wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
	{
	{0, dst},
	});

	wgpu::CommandBuffer commands;
	{
	wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
	wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
	pass.SetPipeline(pipeline);
	pass.SetBindGroup(0, bindGroup);
	pass.DispatchWorkgroups(1);
	pass.End();

	commands = encoder.Finish();
	}

	queue.Submit(1, &commands);

	std::array<float, 4> expectedStruct = {0., 1., 2., 3.};
	std::array<float, 4> expectedMatrix = {4., 5., 6., 7.};
	std::array<uint32_t, 4> expectedArray = {8, 9, 10, 11};
	std::array<float, 4> expectedVector = {12., 13., 14., 15.};
	EXPECT_BUFFER_FLOAT_RANGE_EQ(expectedStruct.data(), dst, 0, 4);
	EXPECT_BUFFER_FLOAT_RANGE_EQ(expectedMatrix.data(), dst, 16, 4);
	EXPECT_BUFFER_U32_RANGE_EQ(expectedArray.data(), dst, 32, 4);
	EXPECT_BUFFER_FLOAT_RANGE_EQ(expectedVector.data(), dst, 48, 4);
	}

	DAWN_INSTANTIATE_TEST(ComputeSharedMemoryTests,
	D3D12Backend(),
	MetalBackend(),
	OpenGLBackend(),
	OpenGLESBackend(),
	VulkanBackend(),
	VulkanBackend({}, {"use_vulkan_zero_initialize_workgroup_memory_extension"}));