[spirv-writer] Decompose uniform matCx3 types

Use the Std140 transform to replace these types with arrays of vectors
when in uniform buffers. This is to workaround a bug on Qualcomm
devices.

Bug: tint:2074
Include-Ci-Only-Tests: true
Change-Id: I2a20a0755d46ba23e88d71b39e87ba01e8ffc5d3
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/158301
Reviewed-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Austin Eng <enga@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Auto-Submit: James Price <jrprice@google.com>
diff --git a/src/dawn/tests/end2end/ShaderTests.cpp b/src/dawn/tests/end2end/ShaderTests.cpp
index 6ded048..f31d28e 100644
--- a/src/dawn/tests/end2end/ShaderTests.cpp
+++ b/src/dawn/tests/end2end/ShaderTests.cpp
@@ -1603,6 +1603,243 @@
     EXPECT_BUFFER_FLOAT_RANGE_EQ(expected.data(), buffer, 0, expected.size());
 }
 
+// Test that robustness works correctly on uniform buffers that contain mat4x3 types, which can
+// cause issues on Qualcomm devices. See crbug.com/tint/2074.
+TEST_P(ShaderTests, Robustness_Uniform_Mat4x3) {
+    // Note: Using non-zero values would make the test more robust, but this involves small changes
+    // to the shader which stop the miscompile in the original bug from happening.
+    std::vector<float> inputs{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+                              0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+    std::vector<uint32_t> constantData{0};
+    std::vector<uint32_t> outputs{0xDEADBEEFu};
+    uint64_t bufferSize = static_cast<uint64_t>(inputs.size() * sizeof(float));
+    wgpu::Buffer buffer =
+        utils::CreateBufferFromData(device, inputs.data(), bufferSize, wgpu::BufferUsage::Uniform);
+    wgpu::Buffer constants =
+        utils::CreateBufferFromData(device, constantData.data(), 4, wgpu::BufferUsage::Uniform);
+    wgpu::Buffer output = utils::CreateBufferFromData(
+        device, outputs.data(), 4, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
+
+    // Note: This shader was lifted from WebGPU CTS, and triggers a miscompile for the second case.
+    // The miscompile disappears when too much of the unrelated code is deleted or changed, so the
+    // shader is left in it's original form.
+    std::string shader = R"(
+    struct Constants {
+      zero: u32
+    };
+    @group(0) @binding(2) var<uniform> constants: Constants;
+
+    struct Result {
+      value: u32
+    };
+    @group(0) @binding(1) var<storage, read_write> result: Result;
+
+    struct TestData {
+      data: mat4x3<f32>,
+    };
+    @group(0) @binding(0) var<uniform> s: TestData;
+
+    fn runTest() -> u32 {
+      {
+        let index = (0u);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1001u; }
+      }
+      {
+        let index = (4u - 1u);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1002u; }
+      }
+      {
+        let index = (4u);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1003u; }
+      }
+      {
+        let index = (1000000u);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1004u; }
+      }
+      {
+        let index = (4294967295u);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1005u; }
+      }
+      {
+        let index = (2147483647u);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1006u; }
+      }
+      {
+        let index = (0u) + 0u;
+        if (any(s.data[index] != vec3<f32>())) { return 0x1007u; }
+      }
+      {
+        let index = (4u - 1u) + 0u;
+        if (any(s.data[index] != vec3<f32>())) { return 0x1008u; }
+      }
+      {
+        let index = (4u) + 0u;
+        if (any(s.data[index] != vec3<f32>())) { return 0x1009u; }
+      }
+      {
+        let index = (1000000u) + 0u;
+        if (any(s.data[index] != vec3<f32>())) { return 0x100au; }
+      }
+      {
+        let index = (4294967295u) + 0u;
+        if (any(s.data[index] != vec3<f32>())) { return 0x100bu; }
+      }
+      {
+        let index = (2147483647u) + 0u;
+        if (any(s.data[index] != vec3<f32>())) { return 0x100cu; }
+      }
+      {
+        let index = (0u) + u32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x100du; }
+      }
+      {
+        let index = (4u - 1u) + u32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x100eu; }
+      }
+      {
+        let index = (4u) + u32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x100fu; }
+      }
+      {
+        let index = (1000000u) + u32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1010u; }
+      }
+      {
+        let index = (4294967295u) + u32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1011u; }
+      }
+      {
+        let index = (2147483647u) + u32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1012u; }
+      }
+      {
+        let index = (0);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1013u; }
+      }
+      {
+        let index = (4 - 1);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1014u; }
+      }
+      {
+        let index = (-1);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1015u; }
+      }
+      {
+        let index = (4);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1016u; }
+      }
+      {
+        let index = (-1000000);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1017u; }
+      }
+      {
+        let index = (1000000);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1018u; }
+      }
+      {
+        let index = (-2147483648);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1019u; }
+      }
+      {
+        let index = (2147483647);
+        if (any(s.data[index] != vec3<f32>())) { return 0x101au; }
+      }
+      {
+        let index = (0) + 0;
+        if (any(s.data[index] != vec3<f32>())) { return 0x101bu; }
+      }
+      {
+        let index = (4 - 1) + 0;
+        if (any(s.data[index] != vec3<f32>())) { return 0x101cu; }
+      }
+      {
+        let index = (-1) + 0;
+        if (any(s.data[index] != vec3<f32>())) { return 0x101du; }
+      }
+      {
+        let index = (4) + 0;
+        if (any(s.data[index] != vec3<f32>())) { return 0x101eu; }
+      }
+      {
+        let index = (-1000000) + 0;
+        if (any(s.data[index] != vec3<f32>())) { return 0x101fu; }
+      }
+      {
+        let index = (1000000) + 0;
+        if (any(s.data[index] != vec3<f32>())) { return 0x1020u; }
+      }
+      {
+        let index = (-2147483648) + 0;
+        if (any(s.data[index] != vec3<f32>())) { return 0x1021u; }
+      }
+      {
+        let index = (2147483647) + 0;
+        if (any(s.data[index] != vec3<f32>())) { return 0x1022u; }
+      }
+      {
+        let index = (0) + i32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1023u; }
+      }
+      {
+        let index = (4 - 1) + i32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1024u; }
+      }
+      {
+        let index = (-1) + i32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1025u; }
+      }
+      {
+        let index = (4) + i32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1026u; }
+      }
+      {
+        let index = (-1000000) + i32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1027u; }
+      }
+      {
+        let index = (1000000) + i32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1028u; }
+      }
+      {
+        let index = (-2147483648) + i32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x1029u; }
+      }
+      {
+        let index = (2147483647) + i32(constants.zero);
+        if (any(s.data[index] != vec3<f32>())) { return 0x102au; }
+      }
+      return 0u;
+    }
+
+    @compute @workgroup_size(1)
+    fn main() {
+      result.value = runTest();
+    }
+)";
+
+    wgpu::ComputePipeline pipeline = CreateComputePipeline(shader, "main");
+
+    wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
+                                                     {{0, buffer}, {1, output}, {2, constants}});
+
+    wgpu::CommandBuffer commands;
+    {
+        wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
+        wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
+        pass.SetPipeline(pipeline);
+        pass.SetBindGroup(0, bindGroup);
+        pass.DispatchWorkgroups(1);
+        pass.End();
+
+        commands = encoder.Finish();
+    }
+
+    queue.Submit(1, &commands);
+
+    outputs[0] = 0u;
+    EXPECT_BUFFER_U32_RANGE_EQ(outputs.data(), output, 0, outputs.size());
+}
+
 DAWN_INSTANTIATE_TEST(ShaderTests,
                       D3D11Backend(),
                       D3D12Backend(),
diff --git a/src/tint/lang/core/ir/transform/std140.cc b/src/tint/lang/core/ir/transform/std140.cc
index 9f5dd6b..0f2b8f6 100644
--- a/src/tint/lang/core/ir/transform/std140.cc
+++ b/src/tint/lang/core/ir/transform/std140.cc
@@ -112,7 +112,14 @@
 
     /// @param mat the matrix type to check
     /// @returns true if @p mat needs to be decomposed
-    static bool NeedsDecomposing(const core::type::Matrix* mat) { return mat->ColumnStride() & 15; }
+    static bool NeedsDecomposing(const core::type::Matrix* mat) {
+        // Std140 layout rules only require us to do this transform for matrices whose column
+        // strides are not a multiple of 16 bytes.
+        //
+        // Due to a bug on Qualcomm devices, we also do this when the *size* of the column vector is
+        // not a multiple of 16 bytes (e.g. matCx3 types). See crbug.com/tint/2074.
+        return mat->ColumnType()->Size() & 15;
+    }
 
     /// Rewrite a type if necessary, decomposing contained matrices.
     /// @param type the type to rewrite
diff --git a/src/tint/lang/core/ir/transform/std140_test.cc b/src/tint/lang/core/ir/transform/std140_test.cc
index 3409ad9..8415529 100644
--- a/src/tint/lang/core/ir/transform/std140_test.cc
+++ b/src/tint/lang/core/ir/transform/std140_test.cc
@@ -60,50 +60,6 @@
     EXPECT_EQ(expect, str());
 }
 
-TEST_F(IR_Std140Test, NoModify_Mat2x3) {
-    auto* mat = ty.mat2x3<f32>();
-    auto* structure = ty.Struct(mod.symbols.New("MyStruct"), {
-                                                                 {mod.symbols.New("a"), mat},
-                                                             });
-    structure->SetStructFlag(core::type::kBlock);
-
-    auto* buffer = b.Var("buffer", ty.ptr(uniform, structure));
-    buffer->SetBindingPoint(0, 0);
-    mod.root_block->Append(buffer);
-
-    auto* func = b.Function("foo", mat);
-    b.Append(func->Block(), [&] {
-        auto* access = b.Access(ty.ptr(uniform, mat), buffer, 0_u);
-        auto* load = b.Load(access);
-        b.Return(func, load);
-    });
-
-    auto* src = R"(
-MyStruct = struct @align(16), @block {
-  a:mat2x3<f32> @offset(0)
-}
-
-%b1 = block {  # root
-  %buffer:ptr<uniform, MyStruct, read_write> = var @binding_point(0, 0)
-}
-
-%foo = func():mat2x3<f32> -> %b2 {
-  %b2 = block {
-    %3:ptr<uniform, mat2x3<f32>, read_write> = access %buffer, 0u
-    %4:mat2x3<f32> = load %3
-    ret %4
-  }
-}
-)";
-    EXPECT_EQ(src, str());
-
-    auto* expect = src;
-
-    Run(Std140);
-
-    EXPECT_EQ(expect, str());
-}
-
 TEST_F(IR_Std140Test, NoModify_Mat2x4) {
     auto* mat = ty.mat2x4<f32>();
     auto* structure = ty.Struct(mod.symbols.New("MyStruct"), {
@@ -1453,6 +1409,80 @@
     EXPECT_EQ(expect, str());
 }
 
+TEST_F(IR_Std140Test, Mat4x3_LoadMatrix) {
+    auto* mat = ty.mat4x3<f32>();
+    auto* structure = ty.Struct(mod.symbols.New("MyStruct"), {
+                                                                 {mod.symbols.New("a"), mat},
+                                                             });
+    structure->SetStructFlag(core::type::kBlock);
+
+    auto* buffer = b.Var("buffer", ty.ptr(uniform, structure));
+    buffer->SetBindingPoint(0, 0);
+    mod.root_block->Append(buffer);
+
+    auto* func = b.Function("foo", mat);
+    b.Append(func->Block(), [&] {
+        auto* access = b.Access(ty.ptr(uniform, mat), buffer, 0_u);
+        auto* load = b.Load(access);
+        b.Return(func, load);
+    });
+
+    auto* src = R"(
+MyStruct = struct @align(16), @block {
+  a:mat4x3<f32> @offset(0)
+}
+
+%b1 = block {  # root
+  %buffer:ptr<uniform, MyStruct, read_write> = var @binding_point(0, 0)
+}
+
+%foo = func():mat4x3<f32> -> %b2 {
+  %b2 = block {
+    %3:ptr<uniform, mat4x3<f32>, read_write> = access %buffer, 0u
+    %4:mat4x3<f32> = load %3
+    ret %4
+  }
+}
+)";
+    EXPECT_EQ(src, str());
+
+    auto* expect = R"(
+MyStruct = struct @align(16), @block {
+  a:mat4x3<f32> @offset(0)
+}
+
+MyStruct_std140 = struct @align(16), @block {
+  a_col0:vec3<f32> @offset(0)
+  a_col1:vec3<f32> @offset(16)
+  a_col2:vec3<f32> @offset(32)
+  a_col3:vec3<f32> @offset(48)
+}
+
+%b1 = block {  # root
+  %buffer:ptr<uniform, MyStruct_std140, read_write> = var @binding_point(0, 0)
+}
+
+%foo = func():mat4x3<f32> -> %b2 {
+  %b2 = block {
+    %3:ptr<uniform, vec3<f32>, read_write> = access %buffer, 0u
+    %4:vec3<f32> = load %3
+    %5:ptr<uniform, vec3<f32>, read_write> = access %buffer, 1u
+    %6:vec3<f32> = load %5
+    %7:ptr<uniform, vec3<f32>, read_write> = access %buffer, 2u
+    %8:vec3<f32> = load %7
+    %9:ptr<uniform, vec3<f32>, read_write> = access %buffer, 3u
+    %10:vec3<f32> = load %9
+    %11:mat4x3<f32> = construct %4, %6, %8, %10
+    ret %11
+  }
+}
+)";
+
+    Run(Std140);
+
+    EXPECT_EQ(expect, str());
+}
+
 TEST_F(IR_Std140Test, F16) {
     auto* structure =
         ty.Struct(mod.symbols.New("MyStruct"), {