[spirv-writer] Decompose uniform matCx3 types
Use the Std140 transform to replace these types with arrays of vectors
when in uniform buffers. This is to workaround a bug on Qualcomm
devices.
Bug: tint:2074
Include-Ci-Only-Tests: true
Change-Id: I2a20a0755d46ba23e88d71b39e87ba01e8ffc5d3
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/158301
Reviewed-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Austin Eng <enga@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Auto-Submit: James Price <jrprice@google.com>
diff --git a/src/dawn/tests/end2end/ShaderTests.cpp b/src/dawn/tests/end2end/ShaderTests.cpp
index 6ded048..f31d28e 100644
--- a/src/dawn/tests/end2end/ShaderTests.cpp
+++ b/src/dawn/tests/end2end/ShaderTests.cpp
@@ -1603,6 +1603,243 @@
EXPECT_BUFFER_FLOAT_RANGE_EQ(expected.data(), buffer, 0, expected.size());
}
+// Test that robustness works correctly on uniform buffers that contain mat4x3 types, which can
+// cause issues on Qualcomm devices. See crbug.com/tint/2074.
+TEST_P(ShaderTests, Robustness_Uniform_Mat4x3) {
+ // Note: Using non-zero values would make the test more robust, but this involves small changes
+ // to the shader which stop the miscompile in the original bug from happening.
+ std::vector<float> inputs{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+ std::vector<uint32_t> constantData{0};
+ std::vector<uint32_t> outputs{0xDEADBEEFu};
+ uint64_t bufferSize = static_cast<uint64_t>(inputs.size() * sizeof(float));
+ wgpu::Buffer buffer =
+ utils::CreateBufferFromData(device, inputs.data(), bufferSize, wgpu::BufferUsage::Uniform);
+ wgpu::Buffer constants =
+ utils::CreateBufferFromData(device, constantData.data(), 4, wgpu::BufferUsage::Uniform);
+ wgpu::Buffer output = utils::CreateBufferFromData(
+ device, outputs.data(), 4, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
+
+ // Note: This shader was lifted from WebGPU CTS, and triggers a miscompile for the second case.
+ // The miscompile disappears when too much of the unrelated code is deleted or changed, so the
+ // shader is left in it's original form.
+ std::string shader = R"(
+ struct Constants {
+ zero: u32
+ };
+ @group(0) @binding(2) var<uniform> constants: Constants;
+
+ struct Result {
+ value: u32
+ };
+ @group(0) @binding(1) var<storage, read_write> result: Result;
+
+ struct TestData {
+ data: mat4x3<f32>,
+ };
+ @group(0) @binding(0) var<uniform> s: TestData;
+
+ fn runTest() -> u32 {
+ {
+ let index = (0u);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1001u; }
+ }
+ {
+ let index = (4u - 1u);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1002u; }
+ }
+ {
+ let index = (4u);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1003u; }
+ }
+ {
+ let index = (1000000u);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1004u; }
+ }
+ {
+ let index = (4294967295u);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1005u; }
+ }
+ {
+ let index = (2147483647u);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1006u; }
+ }
+ {
+ let index = (0u) + 0u;
+ if (any(s.data[index] != vec3<f32>())) { return 0x1007u; }
+ }
+ {
+ let index = (4u - 1u) + 0u;
+ if (any(s.data[index] != vec3<f32>())) { return 0x1008u; }
+ }
+ {
+ let index = (4u) + 0u;
+ if (any(s.data[index] != vec3<f32>())) { return 0x1009u; }
+ }
+ {
+ let index = (1000000u) + 0u;
+ if (any(s.data[index] != vec3<f32>())) { return 0x100au; }
+ }
+ {
+ let index = (4294967295u) + 0u;
+ if (any(s.data[index] != vec3<f32>())) { return 0x100bu; }
+ }
+ {
+ let index = (2147483647u) + 0u;
+ if (any(s.data[index] != vec3<f32>())) { return 0x100cu; }
+ }
+ {
+ let index = (0u) + u32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x100du; }
+ }
+ {
+ let index = (4u - 1u) + u32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x100eu; }
+ }
+ {
+ let index = (4u) + u32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x100fu; }
+ }
+ {
+ let index = (1000000u) + u32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1010u; }
+ }
+ {
+ let index = (4294967295u) + u32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1011u; }
+ }
+ {
+ let index = (2147483647u) + u32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1012u; }
+ }
+ {
+ let index = (0);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1013u; }
+ }
+ {
+ let index = (4 - 1);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1014u; }
+ }
+ {
+ let index = (-1);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1015u; }
+ }
+ {
+ let index = (4);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1016u; }
+ }
+ {
+ let index = (-1000000);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1017u; }
+ }
+ {
+ let index = (1000000);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1018u; }
+ }
+ {
+ let index = (-2147483648);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1019u; }
+ }
+ {
+ let index = (2147483647);
+ if (any(s.data[index] != vec3<f32>())) { return 0x101au; }
+ }
+ {
+ let index = (0) + 0;
+ if (any(s.data[index] != vec3<f32>())) { return 0x101bu; }
+ }
+ {
+ let index = (4 - 1) + 0;
+ if (any(s.data[index] != vec3<f32>())) { return 0x101cu; }
+ }
+ {
+ let index = (-1) + 0;
+ if (any(s.data[index] != vec3<f32>())) { return 0x101du; }
+ }
+ {
+ let index = (4) + 0;
+ if (any(s.data[index] != vec3<f32>())) { return 0x101eu; }
+ }
+ {
+ let index = (-1000000) + 0;
+ if (any(s.data[index] != vec3<f32>())) { return 0x101fu; }
+ }
+ {
+ let index = (1000000) + 0;
+ if (any(s.data[index] != vec3<f32>())) { return 0x1020u; }
+ }
+ {
+ let index = (-2147483648) + 0;
+ if (any(s.data[index] != vec3<f32>())) { return 0x1021u; }
+ }
+ {
+ let index = (2147483647) + 0;
+ if (any(s.data[index] != vec3<f32>())) { return 0x1022u; }
+ }
+ {
+ let index = (0) + i32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1023u; }
+ }
+ {
+ let index = (4 - 1) + i32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1024u; }
+ }
+ {
+ let index = (-1) + i32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1025u; }
+ }
+ {
+ let index = (4) + i32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1026u; }
+ }
+ {
+ let index = (-1000000) + i32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1027u; }
+ }
+ {
+ let index = (1000000) + i32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1028u; }
+ }
+ {
+ let index = (-2147483648) + i32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x1029u; }
+ }
+ {
+ let index = (2147483647) + i32(constants.zero);
+ if (any(s.data[index] != vec3<f32>())) { return 0x102au; }
+ }
+ return 0u;
+ }
+
+ @compute @workgroup_size(1)
+ fn main() {
+ result.value = runTest();
+ }
+)";
+
+ wgpu::ComputePipeline pipeline = CreateComputePipeline(shader, "main");
+
+ wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+ {{0, buffer}, {1, output}, {2, constants}});
+
+ wgpu::CommandBuffer commands;
+ {
+ wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+ wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+ pass.SetPipeline(pipeline);
+ pass.SetBindGroup(0, bindGroup);
+ pass.DispatchWorkgroups(1);
+ pass.End();
+
+ commands = encoder.Finish();
+ }
+
+ queue.Submit(1, &commands);
+
+ outputs[0] = 0u;
+ EXPECT_BUFFER_U32_RANGE_EQ(outputs.data(), output, 0, outputs.size());
+}
+
DAWN_INSTANTIATE_TEST(ShaderTests,
D3D11Backend(),
D3D12Backend(),
diff --git a/src/tint/lang/core/ir/transform/std140.cc b/src/tint/lang/core/ir/transform/std140.cc
index 9f5dd6b..0f2b8f6 100644
--- a/src/tint/lang/core/ir/transform/std140.cc
+++ b/src/tint/lang/core/ir/transform/std140.cc
@@ -112,7 +112,14 @@
/// @param mat the matrix type to check
/// @returns true if @p mat needs to be decomposed
- static bool NeedsDecomposing(const core::type::Matrix* mat) { return mat->ColumnStride() & 15; }
+ static bool NeedsDecomposing(const core::type::Matrix* mat) {
+ // Std140 layout rules only require us to do this transform for matrices whose column
+ // strides are not a multiple of 16 bytes.
+ //
+ // Due to a bug on Qualcomm devices, we also do this when the *size* of the column vector is
+ // not a multiple of 16 bytes (e.g. matCx3 types). See crbug.com/tint/2074.
+ return mat->ColumnType()->Size() & 15;
+ }
/// Rewrite a type if necessary, decomposing contained matrices.
/// @param type the type to rewrite
diff --git a/src/tint/lang/core/ir/transform/std140_test.cc b/src/tint/lang/core/ir/transform/std140_test.cc
index 3409ad9..8415529 100644
--- a/src/tint/lang/core/ir/transform/std140_test.cc
+++ b/src/tint/lang/core/ir/transform/std140_test.cc
@@ -60,50 +60,6 @@
EXPECT_EQ(expect, str());
}
-TEST_F(IR_Std140Test, NoModify_Mat2x3) {
- auto* mat = ty.mat2x3<f32>();
- auto* structure = ty.Struct(mod.symbols.New("MyStruct"), {
- {mod.symbols.New("a"), mat},
- });
- structure->SetStructFlag(core::type::kBlock);
-
- auto* buffer = b.Var("buffer", ty.ptr(uniform, structure));
- buffer->SetBindingPoint(0, 0);
- mod.root_block->Append(buffer);
-
- auto* func = b.Function("foo", mat);
- b.Append(func->Block(), [&] {
- auto* access = b.Access(ty.ptr(uniform, mat), buffer, 0_u);
- auto* load = b.Load(access);
- b.Return(func, load);
- });
-
- auto* src = R"(
-MyStruct = struct @align(16), @block {
- a:mat2x3<f32> @offset(0)
-}
-
-%b1 = block { # root
- %buffer:ptr<uniform, MyStruct, read_write> = var @binding_point(0, 0)
-}
-
-%foo = func():mat2x3<f32> -> %b2 {
- %b2 = block {
- %3:ptr<uniform, mat2x3<f32>, read_write> = access %buffer, 0u
- %4:mat2x3<f32> = load %3
- ret %4
- }
-}
-)";
- EXPECT_EQ(src, str());
-
- auto* expect = src;
-
- Run(Std140);
-
- EXPECT_EQ(expect, str());
-}
-
TEST_F(IR_Std140Test, NoModify_Mat2x4) {
auto* mat = ty.mat2x4<f32>();
auto* structure = ty.Struct(mod.symbols.New("MyStruct"), {
@@ -1453,6 +1409,80 @@
EXPECT_EQ(expect, str());
}
+TEST_F(IR_Std140Test, Mat4x3_LoadMatrix) {
+ auto* mat = ty.mat4x3<f32>();
+ auto* structure = ty.Struct(mod.symbols.New("MyStruct"), {
+ {mod.symbols.New("a"), mat},
+ });
+ structure->SetStructFlag(core::type::kBlock);
+
+ auto* buffer = b.Var("buffer", ty.ptr(uniform, structure));
+ buffer->SetBindingPoint(0, 0);
+ mod.root_block->Append(buffer);
+
+ auto* func = b.Function("foo", mat);
+ b.Append(func->Block(), [&] {
+ auto* access = b.Access(ty.ptr(uniform, mat), buffer, 0_u);
+ auto* load = b.Load(access);
+ b.Return(func, load);
+ });
+
+ auto* src = R"(
+MyStruct = struct @align(16), @block {
+ a:mat4x3<f32> @offset(0)
+}
+
+%b1 = block { # root
+ %buffer:ptr<uniform, MyStruct, read_write> = var @binding_point(0, 0)
+}
+
+%foo = func():mat4x3<f32> -> %b2 {
+ %b2 = block {
+ %3:ptr<uniform, mat4x3<f32>, read_write> = access %buffer, 0u
+ %4:mat4x3<f32> = load %3
+ ret %4
+ }
+}
+)";
+ EXPECT_EQ(src, str());
+
+ auto* expect = R"(
+MyStruct = struct @align(16), @block {
+ a:mat4x3<f32> @offset(0)
+}
+
+MyStruct_std140 = struct @align(16), @block {
+ a_col0:vec3<f32> @offset(0)
+ a_col1:vec3<f32> @offset(16)
+ a_col2:vec3<f32> @offset(32)
+ a_col3:vec3<f32> @offset(48)
+}
+
+%b1 = block { # root
+ %buffer:ptr<uniform, MyStruct_std140, read_write> = var @binding_point(0, 0)
+}
+
+%foo = func():mat4x3<f32> -> %b2 {
+ %b2 = block {
+ %3:ptr<uniform, vec3<f32>, read_write> = access %buffer, 0u
+ %4:vec3<f32> = load %3
+ %5:ptr<uniform, vec3<f32>, read_write> = access %buffer, 1u
+ %6:vec3<f32> = load %5
+ %7:ptr<uniform, vec3<f32>, read_write> = access %buffer, 2u
+ %8:vec3<f32> = load %7
+ %9:ptr<uniform, vec3<f32>, read_write> = access %buffer, 3u
+ %10:vec3<f32> = load %9
+ %11:mat4x3<f32> = construct %4, %6, %8, %10
+ ret %11
+ }
+}
+)";
+
+ Run(Std140);
+
+ EXPECT_EQ(expect, str());
+}
+
TEST_F(IR_Std140Test, F16) {
auto* structure =
ty.Struct(mod.symbols.New("MyStruct"), {