msl: Overload matrix-vector arithmetic operators

These operators are not defined in the metal namespace when the vector
operands are packed.

Fixed: tint:1121
Change-Id: I2e8f4302e08117ca41bac6c05fb24a70d1215740
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/62480
Kokoro: Kokoro <noreply+kokoro@google.com>
Auto-Submit: James Price <jrprice@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
diff --git a/src/writer/msl/generator_impl.cc b/src/writer/msl/generator_impl.cc
index d731ec1..0f78cac 100644
--- a/src/writer/msl/generator_impl.cc
+++ b/src/writer/msl/generator_impl.cc
@@ -2310,6 +2310,26 @@
     if (!EmitType(out, vec, "")) {
       return false;
     }
+
+    if (vec->is_float_vector() && !matrix_packed_vector_overloads_) {
+      // Overload operators for matrix-vector arithmetic where the vector
+      // operand is packed, as these overloads to not exist in the metal
+      // namespace.
+      TextBuffer b;
+      TINT_DEFER(helpers_.Append(b));
+      line(&b) << R"(template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+)";
+      matrix_packed_vector_overloads_ = true;
+    }
+
     return true;
   }
 
diff --git a/src/writer/msl/generator_impl.h b/src/writer/msl/generator_impl.h
index 6a4efb7..9c65c15 100644
--- a/src/writer/msl/generator_impl.h
+++ b/src/writer/msl/generator_impl.h
@@ -355,6 +355,9 @@
   /// True if an invariant attribute has been generated.
   bool has_invariant_ = false;
 
+  /// True if matrix-packed_vector operator overloads have been generated.
+  bool matrix_packed_vector_overloads_ = false;
+
   std::unordered_map<const sem::Intrinsic*, std::string> intrinsics_;
   std::unordered_map<const sem::Type*, std::string> unary_minus_funcs_;
 };
diff --git a/test/buffer/storage/dynamic_index/read.wgsl.expected.msl b/test/buffer/storage/dynamic_index/read.wgsl.expected.msl
index 6b40e48..12f9ac5 100644
--- a/test/buffer/storage/dynamic_index/read.wgsl.expected.msl
+++ b/test/buffer/storage/dynamic_index/read.wgsl.expected.msl
@@ -1,6 +1,17 @@
 #include <metal_stdlib>
 
 using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
 struct tint_array_wrapper {
   /* 0x0000 */ int4 arr[4];
 };
diff --git a/test/buffer/storage/dynamic_index/write.wgsl.expected.msl b/test/buffer/storage/dynamic_index/write.wgsl.expected.msl
index fcb9558..4c36e48 100644
--- a/test/buffer/storage/dynamic_index/write.wgsl.expected.msl
+++ b/test/buffer/storage/dynamic_index/write.wgsl.expected.msl
@@ -1,6 +1,17 @@
 #include <metal_stdlib>
 
 using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
 struct tint_array_wrapper {
   /* 0x0000 */ int4 arr[4];
 };
diff --git a/test/buffer/storage/static_index/read.wgsl.expected.msl b/test/buffer/storage/static_index/read.wgsl.expected.msl
index 316ed31..6dbc898 100644
--- a/test/buffer/storage/static_index/read.wgsl.expected.msl
+++ b/test/buffer/storage/static_index/read.wgsl.expected.msl
@@ -1,6 +1,17 @@
 #include <metal_stdlib>
 
 using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
 struct Inner {
   /* 0x0000 */ int x;
 };
diff --git a/test/buffer/storage/static_index/write.wgsl.expected.msl b/test/buffer/storage/static_index/write.wgsl.expected.msl
index eab1f4f..b2b8a49 100644
--- a/test/buffer/storage/static_index/write.wgsl.expected.msl
+++ b/test/buffer/storage/static_index/write.wgsl.expected.msl
@@ -1,6 +1,17 @@
 #include <metal_stdlib>
 
 using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
 struct Inner {
   /* 0x0000 */ int x;
 };
diff --git a/test/buffer/uniform/dynamic_index/read.wgsl.expected.msl b/test/buffer/uniform/dynamic_index/read.wgsl.expected.msl
index eed156e..7d55efd 100644
--- a/test/buffer/uniform/dynamic_index/read.wgsl.expected.msl
+++ b/test/buffer/uniform/dynamic_index/read.wgsl.expected.msl
@@ -1,6 +1,17 @@
 #include <metal_stdlib>
 
 using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
 struct tint_array_wrapper {
   /* 0x0000 */ int4 arr[4];
 };
diff --git a/test/buffer/uniform/static_index/read.wgsl.expected.msl b/test/buffer/uniform/static_index/read.wgsl.expected.msl
index 1cc175a..d003081 100644
--- a/test/buffer/uniform/static_index/read.wgsl.expected.msl
+++ b/test/buffer/uniform/static_index/read.wgsl.expected.msl
@@ -1,6 +1,17 @@
 #include <metal_stdlib>
 
 using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
 struct Inner {
   /* 0x0000 */ int x;
 };
diff --git a/test/bug/tint/1113.wgsl.expected.msl b/test/bug/tint/1113.wgsl.expected.msl
index 1eaadd3..663d47f 100644
--- a/test/bug/tint/1113.wgsl.expected.msl
+++ b/test/bug/tint/1113.wgsl.expected.msl
@@ -1,6 +1,17 @@
 #include <metal_stdlib>
 
 using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
 struct Uniforms {
   /* 0x0000 */ uint numTriangles;
   /* 0x0004 */ uint gridSize;
diff --git a/test/bug/tint/1121.wgsl b/test/bug/tint/1121.wgsl
new file mode 100644
index 0000000..75425d1
--- /dev/null
+++ b/test/bug/tint/1121.wgsl
@@ -0,0 +1,127 @@
+// Take from here:
+// https://github.com/shrekshao/webgpu-deferred-renderer/blob/4f8bf0910793100aa8d60dbd1319bddb5357b1fa/renderer/LightCulling.js
+// With these token replacements:
+//   $NUM_TILE_LIGHT_SLOT = 64
+//   $NUM_TILES = 4
+//   $TILE_COUNT_Y = 2
+//   $TILE_COUNT_X = 2
+//   $TILE_SIZE = 16
+
+struct LightData {
+    position : vec4<f32>;
+    color : vec3<f32>;
+    radius : f32;
+};
+[[block]] struct LightsBuffer {
+    lights: array<LightData>;
+};
+[[group(0), binding(0)]] var<storage, read_write> lightsBuffer: LightsBuffer;
+struct TileLightIdData {
+    count: atomic<u32>;
+    lightId: array<u32, 64>;
+};
+[[block]] struct Tiles {
+    data: array<TileLightIdData, 4>;
+};
+[[group(1), binding(0)]] var<storage, read_write> tileLightId: Tiles;
+  
+[[block]] struct Config {
+    numLights : u32;
+    numTiles : u32;
+    tileCountX : u32;
+    tileCountY : u32;
+    numTileLightSlot : u32;
+    tileSize : u32;
+};
+[[group(2), binding(0)]] var<uniform> config: Config;
+[[block]] struct Uniforms {
+    min : vec4<f32>;
+    max : vec4<f32>;
+    // camera
+    viewMatrix : mat4x4<f32>;
+    projectionMatrix : mat4x4<f32>;
+    // Tile info
+    fullScreenSize : vec4<f32>;    // width, height
+};
+[[group(3), binding(0)]] var<uniform> uniforms: Uniforms;
+[[stage(compute), workgroup_size(64, 1, 1)]]
+fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) {
+    var index = GlobalInvocationID.x;
+    if (index >= config.numLights) {
+        return;
+    }
+    // Light position updating
+    lightsBuffer.lights[index].position.y = lightsBuffer.lights[index].position.y - 0.1 + 0.001 * (f32(index) - 64.0 * floor(f32(index) / 64.0));
+  
+    if (lightsBuffer.lights[index].position.y < uniforms.min.y) {
+        lightsBuffer.lights[index].position.y = uniforms.max.y;
+    }
+    // Light culling
+    // Implementation here is Tiled without per tile min-max depth
+    // You could also implement cluster culling
+    // Feel free to add more compute passes if necessary
+    // some math reference: http://www.txutxi.com/?p=444
+    var M: mat4x4<f32> = uniforms.projectionMatrix;
+    var viewNear: f32 = - M[3][2] / ( -1.0 + M[2][2]);
+    var viewFar: f32 = - M[3][2] / (1.0 + M[2][2]);
+    var lightPos = lightsBuffer.lights[index].position;
+    lightPos = uniforms.viewMatrix * lightPos;
+    lightPos = lightPos / lightPos.w;
+    var lightRadius: f32 = lightsBuffer.lights[index].radius;
+    var boxMin: vec4<f32> = lightPos - vec4<f32>(vec3<f32>(lightRadius), 0.0);
+    var boxMax: vec4<f32> = lightPos + vec4<f32>(vec3<f32>(lightRadius), 0.0);
+    var frustumPlanes: array<vec4<f32>, 6>;
+    frustumPlanes[4] = vec4<f32>(0.0, 0.0, -1.0, viewNear);    // near
+    frustumPlanes[5] = vec4<f32>(0.0, 0.0, 1.0, -viewFar);    // far
+    let TILE_SIZE: i32 = 16;
+    let TILE_COUNT_X: i32 = 2;
+    let TILE_COUNT_Y: i32 = 2;
+    for (var y : i32 = 0; y < TILE_COUNT_Y; y = y + 1) {
+        for (var x : i32 = 0; x < TILE_COUNT_X; x = x + 1) {
+            var tilePixel0Idx : vec2<i32> = vec2<i32>(x * TILE_SIZE, y * TILE_SIZE);
+            // tile position in NDC space
+            var floorCoord: vec2<f32> = 2.0 * vec2<f32>(tilePixel0Idx) / uniforms.fullScreenSize.xy - vec2<f32>(1.0);  // -1, 1
+            var ceilCoord: vec2<f32> = 2.0 * vec2<f32>(tilePixel0Idx + vec2<i32>(TILE_SIZE)) / uniforms.fullScreenSize.xy - vec2<f32>(1.0);  // -1, 1
+            var viewFloorCoord: vec2<f32> = vec2<f32>( (- viewNear * floorCoord.x - M[2][0] * viewNear) / M[0][0] , (- viewNear * floorCoord.y - M[2][1] * viewNear) / M[1][1] );
+            var viewCeilCoord: vec2<f32> = vec2<f32>( (- viewNear * ceilCoord.x - M[2][0] * viewNear) / M[0][0] , (- viewNear * ceilCoord.y - M[2][1] * viewNear) / M[1][1] );
+            frustumPlanes[0] = vec4<f32>(1.0, 0.0, - viewFloorCoord.x / viewNear, 0.0);       // left
+            frustumPlanes[1] = vec4<f32>(-1.0, 0.0, viewCeilCoord.x / viewNear, 0.0);   // right
+            frustumPlanes[2] = vec4<f32>(0.0, 1.0, - viewFloorCoord.y / viewNear, 0.0);       // bottom
+            frustumPlanes[3] = vec4<f32>(0.0, -1.0, viewCeilCoord.y / viewNear, 0.0);   // top
+            var dp: f32 = 0.0;  // dot product
+            for (var i: u32 = 0u; i < 6u; i = i + 1u)
+            {
+                var p: vec4<f32>;
+                if (frustumPlanes[i].x > 0.0) {
+                    p.x = boxMax.x;
+                } else {
+                    p.x = boxMin.x;
+                }
+                if (frustumPlanes[i].y > 0.0) {
+                    p.y = boxMax.y;
+                } else {
+                    p.y = boxMin.y;
+                }
+                if (frustumPlanes[i].z > 0.0) {
+                    p.z = boxMax.z;
+                } else {
+                    p.z = boxMin.z;
+                }
+                p.w = 1.0;
+                dp = dp + min(0.0, dot(p, frustumPlanes[i]));
+            }
+            if (dp >= 0.0) {
+                // light is overlapping with the tile
+                var tileId: u32 = u32(x + y * TILE_COUNT_X);
+                if (tileId < 0u || tileId >= config.numTiles) {
+                    continue;
+                }
+                var offset: u32 = atomicAdd(&(tileLightId.data[tileId].count), 1u);
+                if (offset >= config.numTileLightSlot) {
+                    continue;
+                }
+                tileLightId.data[tileId].lightId[offset] = GlobalInvocationID.x;
+            }
+        }
+    }
+}
diff --git a/test/bug/tint/1121.wgsl.expected.hlsl b/test/bug/tint/1121.wgsl.expected.hlsl
new file mode 100644
index 0000000..12d8865
--- /dev/null
+++ b/test/bug/tint/1121.wgsl.expected.hlsl
@@ -0,0 +1,115 @@
+uint atomicAdd_1(RWByteAddressBuffer buffer, uint offset, uint value) {
+  uint original_value = 0;
+  buffer.InterlockedAdd(offset, value, original_value);
+  return original_value;
+}
+
+RWByteAddressBuffer lightsBuffer : register(u0, space0);
+
+RWByteAddressBuffer tileLightId : register(u0, space1);
+
+cbuffer cbuffer_config : register(b0, space2) {
+  uint4 config[2];
+};
+
+cbuffer cbuffer_uniforms : register(b0, space3) {
+  uint4 uniforms[11];
+};
+
+struct tint_symbol_1 {
+  uint3 GlobalInvocationID : SV_DispatchThreadID;
+};
+
+float4x4 tint_symbol_6(uint4 buffer[11], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+void main_inner(uint3 GlobalInvocationID) {
+  uint index = GlobalInvocationID.x;
+  if ((index >= config[0].x)) {
+    return;
+  }
+  lightsBuffer.Store(((32u * index) + 4u), asuint(((asfloat(lightsBuffer.Load(((32u * index) + 4u))) - 0.100000001f) + (0.001f * (float(index) - (64.0f * floor((float(index) / 64.0f))))))));
+  if ((asfloat(lightsBuffer.Load(((32u * index) + 4u))) < asfloat(uniforms[0].y))) {
+    lightsBuffer.Store(((32u * index) + 4u), asuint(asfloat(uniforms[1].y)));
+  }
+  float4x4 M = tint_symbol_6(uniforms, 96u);
+  float viewNear = (-(M[3][2]) / (-1.0f + M[2][2]));
+  float viewFar = (-(M[3][2]) / (1.0f + M[2][2]));
+  float4 lightPos = asfloat(lightsBuffer.Load4((32u * index)));
+  lightPos = mul(lightPos, tint_symbol_6(uniforms, 32u));
+  lightPos = (lightPos / lightPos.w);
+  float lightRadius = asfloat(lightsBuffer.Load(((32u * index) + 28u)));
+  float4 boxMin = (lightPos - float4(float3((lightRadius).xxx), 0.0f));
+  float4 boxMax = (lightPos + float4(float3((lightRadius).xxx), 0.0f));
+  float4 frustumPlanes[6] = (float4[6])0;
+  frustumPlanes[4] = float4(0.0f, 0.0f, -1.0f, viewNear);
+  frustumPlanes[5] = float4(0.0f, 0.0f, 1.0f, -(viewFar));
+  const int TILE_SIZE = 16;
+  const int TILE_COUNT_X = 2;
+  {
+    for(int y_1 = 0; (y_1 < 2); y_1 = (y_1 + 1)) {
+      {
+        for(int x_1 = 0; (x_1 < TILE_COUNT_X); x_1 = (x_1 + 1)) {
+          int2 tilePixel0Idx = int2((x_1 * TILE_SIZE), (y_1 * TILE_SIZE));
+          float2 floorCoord = (((2.0f * float2(tilePixel0Idx)) / asfloat(uniforms[10]).xy) - float2((1.0f).xx));
+          float2 ceilCoord = (((2.0f * float2((tilePixel0Idx + int2((TILE_SIZE).xx)))) / asfloat(uniforms[10]).xy) - float2((1.0f).xx));
+          float2 viewFloorCoord = float2((((-(viewNear) * floorCoord.x) - (M[2][0] * viewNear)) / M[0][0]), (((-(viewNear) * floorCoord.y) - (M[2][1] * viewNear)) / M[1][1]));
+          float2 viewCeilCoord = float2((((-(viewNear) * ceilCoord.x) - (M[2][0] * viewNear)) / M[0][0]), (((-(viewNear) * ceilCoord.y) - (M[2][1] * viewNear)) / M[1][1]));
+          frustumPlanes[0] = float4(1.0f, 0.0f, (-(viewFloorCoord.x) / viewNear), 0.0f);
+          frustumPlanes[1] = float4(-1.0f, 0.0f, (viewCeilCoord.x / viewNear), 0.0f);
+          frustumPlanes[2] = float4(0.0f, 1.0f, (-(viewFloorCoord.y) / viewNear), 0.0f);
+          frustumPlanes[3] = float4(0.0f, -1.0f, (viewCeilCoord.y / viewNear), 0.0f);
+          float dp = 0.0f;
+          {
+            for(uint i = 0u; (i < 6u); i = (i + 1u)) {
+              float4 p = float4(0.0f, 0.0f, 0.0f, 0.0f);
+              if ((frustumPlanes[i].x > 0.0f)) {
+                p.x = boxMax.x;
+              } else {
+                p.x = boxMin.x;
+              }
+              if ((frustumPlanes[i].y > 0.0f)) {
+                p.y = boxMax.y;
+              } else {
+                p.y = boxMin.y;
+              }
+              if ((frustumPlanes[i].z > 0.0f)) {
+                p.z = boxMax.z;
+              } else {
+                p.z = boxMin.z;
+              }
+              p.w = 1.0f;
+              dp = (dp + min(0.0f, dot(p, frustumPlanes[i])));
+            }
+          }
+          if ((dp >= 0.0f)) {
+            uint tileId = uint((x_1 + (y_1 * TILE_COUNT_X)));
+            bool tint_tmp = (tileId < 0u);
+            if (!tint_tmp) {
+              tint_tmp = (tileId >= config[0].y);
+            }
+            if ((tint_tmp)) {
+              continue;
+            }
+            uint offset = atomicAdd_1(tileLightId, (260u * tileId), 1u);
+            if ((offset >= config[1].x)) {
+              continue;
+            }
+            tileLightId.Store((((260u * tileId) + 4u) + (4u * offset)), asuint(GlobalInvocationID.x));
+          }
+        }
+      }
+    }
+  }
+}
+
+[numthreads(64, 1, 1)]
+void main(tint_symbol_1 tint_symbol) {
+  main_inner(tint_symbol.GlobalInvocationID);
+  return;
+}
diff --git a/test/bug/tint/1121.wgsl.expected.msl b/test/bug/tint/1121.wgsl.expected.msl
new file mode 100644
index 0000000..c7eb13c
--- /dev/null
+++ b/test/bug/tint/1121.wgsl.expected.msl
@@ -0,0 +1,130 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
+struct LightData {
+  /* 0x0000 */ float4 position;
+  /* 0x0010 */ packed_float3 color;
+  /* 0x001c */ float radius;
+};
+struct LightsBuffer {
+  /* 0x0000 */ LightData lights[1];
+};
+struct tint_array_wrapper {
+  /* 0x0000 */ uint arr[64];
+};
+struct TileLightIdData {
+  /* 0x0000 */ atomic_uint count;
+  /* 0x0004 */ tint_array_wrapper lightId;
+};
+struct tint_array_wrapper_1 {
+  /* 0x0000 */ TileLightIdData arr[4];
+};
+struct Tiles {
+  /* 0x0000 */ tint_array_wrapper_1 data;
+};
+struct Config {
+  /* 0x0000 */ uint numLights;
+  /* 0x0004 */ uint numTiles;
+  /* 0x0008 */ uint tileCountX;
+  /* 0x000c */ uint tileCountY;
+  /* 0x0010 */ uint numTileLightSlot;
+  /* 0x0014 */ uint tileSize;
+};
+struct Uniforms {
+  /* 0x0000 */ float4 min;
+  /* 0x0010 */ float4 max;
+  /* 0x0020 */ float4x4 viewMatrix;
+  /* 0x0060 */ float4x4 projectionMatrix;
+  /* 0x00a0 */ float4 fullScreenSize;
+};
+struct tint_array_wrapper_2 {
+  float4 arr[6];
+};
+
+void tint_symbol_inner(constant Config& config, constant Uniforms& uniforms, device LightsBuffer& lightsBuffer, device Tiles& tileLightId, uint3 GlobalInvocationID) {
+  uint index = GlobalInvocationID.x;
+  if ((index >= config.numLights)) {
+    return;
+  }
+  lightsBuffer.lights[index].position.y = ((lightsBuffer.lights[index].position.y - 0.100000001f) + (0.001f * (float(index) - (64.0f * floor((float(index) / 64.0f))))));
+  if ((lightsBuffer.lights[index].position.y < uniforms.min.y)) {
+    lightsBuffer.lights[index].position.y = uniforms.max.y;
+  }
+  float4x4 M = uniforms.projectionMatrix;
+  float viewNear = (-(M[3][2]) / (-1.0f + M[2][2]));
+  float viewFar = (-(M[3][2]) / (1.0f + M[2][2]));
+  float4 lightPos = lightsBuffer.lights[index].position;
+  lightPos = (uniforms.viewMatrix * lightPos);
+  lightPos = (lightPos / lightPos.w);
+  float lightRadius = lightsBuffer.lights[index].radius;
+  float4 boxMin = (lightPos - float4(float3(lightRadius), 0.0f));
+  float4 boxMax = (lightPos + float4(float3(lightRadius), 0.0f));
+  tint_array_wrapper_2 frustumPlanes = {};
+  frustumPlanes.arr[4] = float4(0.0f, 0.0f, -1.0f, viewNear);
+  frustumPlanes.arr[5] = float4(0.0f, 0.0f, 1.0f, -(viewFar));
+  int const TILE_SIZE = 16;
+  int const TILE_COUNT_X = 2;
+  int const TILE_COUNT_Y = 2;
+  for(int y_1 = 0; (y_1 < TILE_COUNT_Y); y_1 = as_type<int>((as_type<uint>(y_1) + as_type<uint>(1)))) {
+    for(int x_1 = 0; (x_1 < TILE_COUNT_X); x_1 = as_type<int>((as_type<uint>(x_1) + as_type<uint>(1)))) {
+      int2 tilePixel0Idx = int2(as_type<int>((as_type<uint>(x_1) * as_type<uint>(TILE_SIZE))), as_type<int>((as_type<uint>(y_1) * as_type<uint>(TILE_SIZE))));
+      float2 floorCoord = (((2.0f * float2(tilePixel0Idx)) / uniforms.fullScreenSize.xy) - float2(1.0f));
+      float2 ceilCoord = (((2.0f * float2(as_type<int2>((as_type<uint2>(tilePixel0Idx) + as_type<uint2>(int2(TILE_SIZE)))))) / uniforms.fullScreenSize.xy) - float2(1.0f));
+      float2 viewFloorCoord = float2((((-(viewNear) * floorCoord.x) - (M[2][0] * viewNear)) / M[0][0]), (((-(viewNear) * floorCoord.y) - (M[2][1] * viewNear)) / M[1][1]));
+      float2 viewCeilCoord = float2((((-(viewNear) * ceilCoord.x) - (M[2][0] * viewNear)) / M[0][0]), (((-(viewNear) * ceilCoord.y) - (M[2][1] * viewNear)) / M[1][1]));
+      frustumPlanes.arr[0] = float4(1.0f, 0.0f, (-(viewFloorCoord.x) / viewNear), 0.0f);
+      frustumPlanes.arr[1] = float4(-1.0f, 0.0f, (viewCeilCoord.x / viewNear), 0.0f);
+      frustumPlanes.arr[2] = float4(0.0f, 1.0f, (-(viewFloorCoord.y) / viewNear), 0.0f);
+      frustumPlanes.arr[3] = float4(0.0f, -1.0f, (viewCeilCoord.y / viewNear), 0.0f);
+      float dp = 0.0f;
+      for(uint i = 0u; (i < 6u); i = (i + 1u)) {
+        float4 p = 0.0f;
+        if ((frustumPlanes.arr[i].x > 0.0f)) {
+          p.x = boxMax.x;
+        } else {
+          p.x = boxMin.x;
+        }
+        if ((frustumPlanes.arr[i].y > 0.0f)) {
+          p.y = boxMax.y;
+        } else {
+          p.y = boxMin.y;
+        }
+        if ((frustumPlanes.arr[i].z > 0.0f)) {
+          p.z = boxMax.z;
+        } else {
+          p.z = boxMin.z;
+        }
+        p.w = 1.0f;
+        dp = (dp + fmin(0.0f, dot(p, frustumPlanes.arr[i])));
+      }
+      if ((dp >= 0.0f)) {
+        uint tileId = uint(as_type<int>((as_type<uint>(x_1) + as_type<uint>(as_type<int>((as_type<uint>(y_1) * as_type<uint>(TILE_COUNT_X)))))));
+        if (((tileId < 0u) || (tileId >= config.numTiles))) {
+          continue;
+        }
+        uint offset = atomic_fetch_add_explicit(&(tileLightId.data.arr[tileId].count), 1u, memory_order_relaxed);
+        if ((offset >= config.numTileLightSlot)) {
+          continue;
+        }
+        tileLightId.data.arr[tileId].lightId.arr[offset] = GlobalInvocationID.x;
+      }
+    }
+  }
+}
+
+kernel void tint_symbol(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Config& config [[buffer(0)]], constant Uniforms& uniforms [[buffer(1)]], device LightsBuffer& lightsBuffer [[buffer(2)]], device Tiles& tileLightId [[buffer(3)]]) {
+  tint_symbol_inner(config, uniforms, lightsBuffer, tileLightId, GlobalInvocationID);
+  return;
+}
+
diff --git a/test/bug/tint/1121.wgsl.expected.spvasm b/test/bug/tint/1121.wgsl.expected.spvasm
new file mode 100644
index 0000000..6d3354b
--- /dev/null
+++ b/test/bug/tint/1121.wgsl.expected.spvasm
@@ -0,0 +1,617 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 417
+; Schema: 0
+               OpCapability Shader
+         %60 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %GlobalInvocationID_1
+               OpExecutionMode %main LocalSize 64 1 1
+               OpName %GlobalInvocationID_1 "GlobalInvocationID_1"
+               OpName %LightsBuffer "LightsBuffer"
+               OpMemberName %LightsBuffer 0 "lights"
+               OpName %LightData "LightData"
+               OpMemberName %LightData 0 "position"
+               OpMemberName %LightData 1 "color"
+               OpMemberName %LightData 2 "radius"
+               OpName %lightsBuffer "lightsBuffer"
+               OpName %Tiles "Tiles"
+               OpMemberName %Tiles 0 "data"
+               OpName %TileLightIdData "TileLightIdData"
+               OpMemberName %TileLightIdData 0 "count"
+               OpMemberName %TileLightIdData 1 "lightId"
+               OpName %tileLightId "tileLightId"
+               OpName %Config "Config"
+               OpMemberName %Config 0 "numLights"
+               OpMemberName %Config 1 "numTiles"
+               OpMemberName %Config 2 "tileCountX"
+               OpMemberName %Config 3 "tileCountY"
+               OpMemberName %Config 4 "numTileLightSlot"
+               OpMemberName %Config 5 "tileSize"
+               OpName %config "config"
+               OpName %Uniforms "Uniforms"
+               OpMemberName %Uniforms 0 "min"
+               OpMemberName %Uniforms 1 "max"
+               OpMemberName %Uniforms 2 "viewMatrix"
+               OpMemberName %Uniforms 3 "projectionMatrix"
+               OpMemberName %Uniforms 4 "fullScreenSize"
+               OpName %uniforms "uniforms"
+               OpName %main_inner "main_inner"
+               OpName %GlobalInvocationID "GlobalInvocationID"
+               OpName %index "index"
+               OpName %M "M"
+               OpName %viewNear "viewNear"
+               OpName %viewFar "viewFar"
+               OpName %lightPos "lightPos"
+               OpName %lightRadius "lightRadius"
+               OpName %boxMin "boxMin"
+               OpName %boxMax "boxMax"
+               OpName %frustumPlanes "frustumPlanes"
+               OpName %y "y"
+               OpName %x "x"
+               OpName %tilePixel0Idx "tilePixel0Idx"
+               OpName %floorCoord "floorCoord"
+               OpName %ceilCoord "ceilCoord"
+               OpName %viewFloorCoord "viewFloorCoord"
+               OpName %viewCeilCoord "viewCeilCoord"
+               OpName %dp "dp"
+               OpName %i "i"
+               OpName %p "p"
+               OpName %tileId "tileId"
+               OpName %offset "offset"
+               OpName %main "main"
+               OpDecorate %GlobalInvocationID_1 BuiltIn GlobalInvocationId
+               OpDecorate %LightsBuffer Block
+               OpMemberDecorate %LightsBuffer 0 Offset 0
+               OpMemberDecorate %LightData 0 Offset 0
+               OpMemberDecorate %LightData 1 Offset 16
+               OpMemberDecorate %LightData 2 Offset 28
+               OpDecorate %_runtimearr_LightData ArrayStride 32
+               OpDecorate %lightsBuffer DescriptorSet 0
+               OpDecorate %lightsBuffer Binding 0
+               OpDecorate %Tiles Block
+               OpMemberDecorate %Tiles 0 Offset 0
+               OpMemberDecorate %TileLightIdData 0 Offset 0
+               OpMemberDecorate %TileLightIdData 1 Offset 4
+               OpDecorate %_arr_uint_uint_64 ArrayStride 4
+               OpDecorate %_arr_TileLightIdData_uint_4 ArrayStride 260
+               OpDecorate %tileLightId DescriptorSet 1
+               OpDecorate %tileLightId Binding 0
+               OpDecorate %Config Block
+               OpMemberDecorate %Config 0 Offset 0
+               OpMemberDecorate %Config 1 Offset 4
+               OpMemberDecorate %Config 2 Offset 8
+               OpMemberDecorate %Config 3 Offset 12
+               OpMemberDecorate %Config 4 Offset 16
+               OpMemberDecorate %Config 5 Offset 20
+               OpDecorate %config NonWritable
+               OpDecorate %config DescriptorSet 2
+               OpDecorate %config Binding 0
+               OpDecorate %Uniforms Block
+               OpMemberDecorate %Uniforms 0 Offset 0
+               OpMemberDecorate %Uniforms 1 Offset 16
+               OpMemberDecorate %Uniforms 2 Offset 32
+               OpMemberDecorate %Uniforms 2 ColMajor
+               OpMemberDecorate %Uniforms 2 MatrixStride 16
+               OpMemberDecorate %Uniforms 3 Offset 96
+               OpMemberDecorate %Uniforms 3 ColMajor
+               OpMemberDecorate %Uniforms 3 MatrixStride 16
+               OpMemberDecorate %Uniforms 4 Offset 160
+               OpDecorate %uniforms NonWritable
+               OpDecorate %uniforms DescriptorSet 3
+               OpDecorate %uniforms Binding 0
+               OpDecorate %_arr_v4float_uint_6 ArrayStride 16
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%GlobalInvocationID_1 = OpVariable %_ptr_Input_v3uint Input
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+    %v3float = OpTypeVector %float 3
+  %LightData = OpTypeStruct %v4float %v3float %float
+%_runtimearr_LightData = OpTypeRuntimeArray %LightData
+%LightsBuffer = OpTypeStruct %_runtimearr_LightData
+%_ptr_StorageBuffer_LightsBuffer = OpTypePointer StorageBuffer %LightsBuffer
+%lightsBuffer = OpVariable %_ptr_StorageBuffer_LightsBuffer StorageBuffer
+    %uint_64 = OpConstant %uint 64
+%_arr_uint_uint_64 = OpTypeArray %uint %uint_64
+%TileLightIdData = OpTypeStruct %uint %_arr_uint_uint_64
+     %uint_4 = OpConstant %uint 4
+%_arr_TileLightIdData_uint_4 = OpTypeArray %TileLightIdData %uint_4
+      %Tiles = OpTypeStruct %_arr_TileLightIdData_uint_4
+%_ptr_StorageBuffer_Tiles = OpTypePointer StorageBuffer %Tiles
+%tileLightId = OpVariable %_ptr_StorageBuffer_Tiles StorageBuffer
+     %Config = OpTypeStruct %uint %uint %uint %uint %uint %uint
+%_ptr_Uniform_Config = OpTypePointer Uniform %Config
+     %config = OpVariable %_ptr_Uniform_Config Uniform
+%mat4v4float = OpTypeMatrix %v4float 4
+   %Uniforms = OpTypeStruct %v4float %v4float %mat4v4float %mat4v4float %v4float
+%_ptr_Uniform_Uniforms = OpTypePointer Uniform %Uniforms
+   %uniforms = OpVariable %_ptr_Uniform_Uniforms Uniform
+       %void = OpTypeVoid
+         %28 = OpTypeFunction %void %v3uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %36 = OpConstantNull %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+       %bool = OpTypeBool
+     %uint_1 = OpConstant %uint 1
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%float_0_100000001 = OpConstant %float 0.100000001
+%float_0_00100000005 = OpConstant %float 0.00100000005
+   %float_64 = OpConstant %float 64
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+     %uint_3 = OpConstant %uint 3
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Function_mat4v4float = OpTypePointer Function %mat4v4float
+         %87 = OpConstantNull %mat4v4float
+        %int = OpTypeInt 32 1
+      %int_3 = OpConstant %int 3
+      %int_2 = OpConstant %int 2
+%_ptr_Function_float = OpTypePointer Function %float
+   %float_n1 = OpConstant %float -1
+        %101 = OpConstantNull %float
+    %float_1 = OpConstant %float 1
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+        %117 = OpConstantNull %v4float
+     %uint_2 = OpConstant %uint 2
+    %float_0 = OpConstant %float 0
+     %uint_6 = OpConstant %uint 6
+%_arr_v4float_uint_6 = OpTypeArray %v4float %uint_6
+%_ptr_Function__arr_v4float_uint_6 = OpTypePointer Function %_arr_v4float_uint_6
+        %156 = OpConstantNull %_arr_v4float_uint_6
+      %int_4 = OpConstant %int 4
+      %int_5 = OpConstant %int 5
+     %int_16 = OpConstant %int 16
+      %int_0 = OpConstant %int 0
+%_ptr_Function_int = OpTypePointer Function %int
+        %170 = OpConstantNull %int
+      %v2int = OpTypeVector %int 2
+%_ptr_Function_v2int = OpTypePointer Function %v2int
+        %198 = OpConstantNull %v2int
+    %float_2 = OpConstant %float 2
+    %v2float = OpTypeVector %float 2
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+        %209 = OpConstantComposite %v2float %float_1 %float_1
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+        %213 = OpConstantNull %v2float
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+%_ptr_StorageBuffer_uint_0 = OpTypePointer StorageBuffer %uint
+        %412 = OpTypeFunction %void
+ %main_inner = OpFunction %void None %28
+%GlobalInvocationID = OpFunctionParameter %v3uint
+         %32 = OpLabel
+      %index = OpVariable %_ptr_Function_uint Function %36
+          %M = OpVariable %_ptr_Function_mat4v4float Function %87
+   %viewNear = OpVariable %_ptr_Function_float Function %101
+    %viewFar = OpVariable %_ptr_Function_float Function %101
+   %lightPos = OpVariable %_ptr_Function_v4float Function %117
+        %127 = OpVariable %_ptr_Function_v4float Function %117
+%lightRadius = OpVariable %_ptr_Function_float Function %101
+     %boxMin = OpVariable %_ptr_Function_v4float Function %117
+     %boxMax = OpVariable %_ptr_Function_v4float Function %117
+%frustumPlanes = OpVariable %_ptr_Function__arr_v4float_uint_6 Function %156
+          %y = OpVariable %_ptr_Function_int Function %170
+          %x = OpVariable %_ptr_Function_int Function %170
+%tilePixel0Idx = OpVariable %_ptr_Function_v2int Function %198
+ %floorCoord = OpVariable %_ptr_Function_v2float Function %213
+  %ceilCoord = OpVariable %_ptr_Function_v2float Function %213
+%viewFloorCoord = OpVariable %_ptr_Function_v2float Function %213
+%viewCeilCoord = OpVariable %_ptr_Function_v2float Function %213
+         %dp = OpVariable %_ptr_Function_float Function %101
+          %i = OpVariable %_ptr_Function_uint Function %36
+          %p = OpVariable %_ptr_Function_v4float Function %117
+     %tileId = OpVariable %_ptr_Function_uint Function %36
+     %offset = OpVariable %_ptr_Function_uint Function %36
+         %33 = OpCompositeExtract %uint %GlobalInvocationID 0
+               OpStore %index %33
+         %37 = OpLoad %uint %index
+         %40 = OpAccessChain %_ptr_Uniform_uint %config %uint_0
+         %41 = OpLoad %uint %40
+         %42 = OpUGreaterThanEqual %bool %37 %41
+               OpSelectionMerge %44 None
+               OpBranchConditional %42 %45 %44
+         %45 = OpLabel
+               OpReturn
+         %44 = OpLabel
+         %46 = OpLoad %uint %index
+         %49 = OpAccessChain %_ptr_StorageBuffer_float %lightsBuffer %uint_0 %46 %uint_0 %uint_1
+         %50 = OpLoad %uint %index
+         %51 = OpAccessChain %_ptr_StorageBuffer_float %lightsBuffer %uint_0 %50 %uint_0 %uint_1
+         %52 = OpLoad %float %51
+         %54 = OpFSub %float %52 %float_0_100000001
+         %57 = OpLoad %uint %index
+         %56 = OpConvertUToF %float %57
+         %62 = OpLoad %uint %index
+         %61 = OpConvertUToF %float %62
+         %63 = OpFDiv %float %61 %float_64
+         %59 = OpExtInst %float %60 Floor %63
+         %64 = OpFMul %float %float_64 %59
+         %65 = OpFSub %float %56 %64
+         %66 = OpFMul %float %float_0_00100000005 %65
+         %67 = OpFAdd %float %54 %66
+               OpStore %49 %67
+         %68 = OpLoad %uint %index
+         %69 = OpAccessChain %_ptr_StorageBuffer_float %lightsBuffer %uint_0 %68 %uint_0 %uint_1
+         %70 = OpLoad %float %69
+         %72 = OpAccessChain %_ptr_Uniform_float %uniforms %uint_0 %uint_1
+         %73 = OpLoad %float %72
+         %74 = OpFOrdLessThan %bool %70 %73
+               OpSelectionMerge %75 None
+               OpBranchConditional %74 %76 %75
+         %76 = OpLabel
+         %77 = OpLoad %uint %index
+         %78 = OpAccessChain %_ptr_StorageBuffer_float %lightsBuffer %uint_0 %77 %uint_0 %uint_1
+         %79 = OpAccessChain %_ptr_Uniform_float %uniforms %uint_1 %uint_1
+         %80 = OpLoad %float %79
+               OpStore %78 %80
+               OpBranch %75
+         %75 = OpLabel
+         %83 = OpAccessChain %_ptr_Uniform_mat4v4float %uniforms %uint_3
+         %84 = OpLoad %mat4v4float %83
+               OpStore %M %84
+         %93 = OpAccessChain %_ptr_Function_float %M %int_3 %int_2
+         %94 = OpLoad %float %93
+         %88 = OpFNegate %float %94
+         %96 = OpAccessChain %_ptr_Function_float %M %int_2 %int_2
+         %97 = OpLoad %float %96
+         %98 = OpFAdd %float %float_n1 %97
+         %99 = OpFDiv %float %88 %98
+               OpStore %viewNear %99
+        %103 = OpAccessChain %_ptr_Function_float %M %int_3 %int_2
+        %104 = OpLoad %float %103
+        %102 = OpFNegate %float %104
+        %106 = OpAccessChain %_ptr_Function_float %M %int_2 %int_2
+        %107 = OpLoad %float %106
+        %108 = OpFAdd %float %float_1 %107
+        %109 = OpFDiv %float %102 %108
+               OpStore %viewFar %109
+        %111 = OpLoad %uint %index
+        %113 = OpAccessChain %_ptr_StorageBuffer_v4float %lightsBuffer %uint_0 %111 %uint_0
+        %114 = OpLoad %v4float %113
+               OpStore %lightPos %114
+        %119 = OpAccessChain %_ptr_Uniform_mat4v4float %uniforms %uint_2
+        %120 = OpLoad %mat4v4float %119
+        %121 = OpLoad %v4float %lightPos
+        %122 = OpMatrixTimesVector %v4float %120 %121
+               OpStore %lightPos %122
+        %123 = OpLoad %v4float %lightPos
+        %124 = OpAccessChain %_ptr_Function_float %lightPos %uint_3
+        %125 = OpLoad %float %124
+        %128 = OpCompositeConstruct %v4float %125 %125 %125 %125
+        %126 = OpFDiv %v4float %123 %128
+               OpStore %lightPos %126
+        %129 = OpLoad %uint %index
+        %130 = OpAccessChain %_ptr_StorageBuffer_float %lightsBuffer %uint_0 %129 %uint_2
+        %131 = OpLoad %float %130
+               OpStore %lightRadius %131
+        %133 = OpLoad %v4float %lightPos
+        %134 = OpLoad %float %lightRadius
+        %135 = OpCompositeConstruct %v3float %134 %134 %134
+        %136 = OpCompositeExtract %float %135 0
+        %137 = OpCompositeExtract %float %135 1
+        %138 = OpCompositeExtract %float %135 2
+        %140 = OpCompositeConstruct %v4float %136 %137 %138 %float_0
+        %141 = OpFSub %v4float %133 %140
+               OpStore %boxMin %141
+        %143 = OpLoad %v4float %lightPos
+        %144 = OpLoad %float %lightRadius
+        %145 = OpCompositeConstruct %v3float %144 %144 %144
+        %146 = OpCompositeExtract %float %145 0
+        %147 = OpCompositeExtract %float %145 1
+        %148 = OpCompositeExtract %float %145 2
+        %149 = OpCompositeConstruct %v4float %146 %147 %148 %float_0
+        %150 = OpFAdd %v4float %143 %149
+               OpStore %boxMax %150
+        %158 = OpAccessChain %_ptr_Function_v4float %frustumPlanes %int_4
+        %159 = OpLoad %float %viewNear
+        %160 = OpCompositeConstruct %v4float %float_0 %float_0 %float_n1 %159
+               OpStore %158 %160
+        %162 = OpAccessChain %_ptr_Function_v4float %frustumPlanes %int_5
+        %164 = OpLoad %float %viewFar
+        %163 = OpFNegate %float %164
+        %165 = OpCompositeConstruct %v4float %float_0 %float_0 %float_1 %163
+               OpStore %162 %165
+               OpStore %y %int_0
+               OpBranch %171
+        %171 = OpLabel
+               OpLoopMerge %172 %173 None
+               OpBranch %174
+        %174 = OpLabel
+        %176 = OpLoad %int %y
+        %177 = OpSLessThan %bool %176 %int_2
+        %175 = OpLogicalNot %bool %177
+               OpSelectionMerge %178 None
+               OpBranchConditional %175 %179 %178
+        %179 = OpLabel
+               OpBranch %172
+        %178 = OpLabel
+               OpStore %x %int_0
+               OpBranch %181
+        %181 = OpLabel
+               OpLoopMerge %182 %183 None
+               OpBranch %184
+        %184 = OpLabel
+        %186 = OpLoad %int %x
+        %187 = OpSLessThan %bool %186 %int_2
+        %185 = OpLogicalNot %bool %187
+               OpSelectionMerge %188 None
+               OpBranchConditional %185 %189 %188
+        %189 = OpLabel
+               OpBranch %182
+        %188 = OpLabel
+        %191 = OpLoad %int %x
+        %192 = OpIMul %int %191 %int_16
+        %193 = OpLoad %int %y
+        %194 = OpIMul %int %193 %int_16
+        %195 = OpCompositeConstruct %v2int %192 %194
+               OpStore %tilePixel0Idx %195
+        %202 = OpLoad %v2int %tilePixel0Idx
+        %200 = OpConvertSToF %v2float %202
+        %203 = OpVectorTimesScalar %v2float %200 %float_2
+        %205 = OpAccessChain %_ptr_Uniform_v4float %uniforms %uint_4
+        %206 = OpLoad %v4float %205
+        %207 = OpVectorShuffle %v2float %206 %206 0 1
+        %208 = OpFDiv %v2float %203 %207
+        %210 = OpFSub %v2float %208 %209
+               OpStore %floorCoord %210
+        %215 = OpLoad %v2int %tilePixel0Idx
+        %216 = OpCompositeConstruct %v2int %int_16 %int_16
+        %217 = OpIAdd %v2int %215 %216
+        %214 = OpConvertSToF %v2float %217
+        %218 = OpVectorTimesScalar %v2float %214 %float_2
+        %219 = OpAccessChain %_ptr_Uniform_v4float %uniforms %uint_4
+        %220 = OpLoad %v4float %219
+        %221 = OpVectorShuffle %v2float %220 %220 0 1
+        %222 = OpFDiv %v2float %218 %221
+        %223 = OpFSub %v2float %222 %209
+               OpStore %ceilCoord %223
+        %226 = OpLoad %float %viewNear
+        %225 = OpFNegate %float %226
+        %227 = OpAccessChain %_ptr_Function_float %floorCoord %uint_0
+        %228 = OpLoad %float %227
+        %229 = OpFMul %float %225 %228
+        %230 = OpAccessChain %_ptr_Function_float %M %int_2 %int_0
+        %231 = OpLoad %float %230
+        %232 = OpLoad %float %viewNear
+        %233 = OpFMul %float %231 %232
+        %234 = OpFSub %float %229 %233
+        %235 = OpAccessChain %_ptr_Function_float %M %int_0 %int_0
+        %236 = OpLoad %float %235
+        %237 = OpFDiv %float %234 %236
+        %239 = OpLoad %float %viewNear
+        %238 = OpFNegate %float %239
+        %240 = OpAccessChain %_ptr_Function_float %floorCoord %uint_1
+        %241 = OpLoad %float %240
+        %242 = OpFMul %float %238 %241
+        %244 = OpAccessChain %_ptr_Function_float %M %int_2 %int_1
+        %245 = OpLoad %float %244
+        %246 = OpLoad %float %viewNear
+        %247 = OpFMul %float %245 %246
+        %248 = OpFSub %float %242 %247
+        %249 = OpAccessChain %_ptr_Function_float %M %int_1 %int_1
+        %250 = OpLoad %float %249
+        %251 = OpFDiv %float %248 %250
+        %252 = OpCompositeConstruct %v2float %237 %251
+               OpStore %viewFloorCoord %252
+        %255 = OpLoad %float %viewNear
+        %254 = OpFNegate %float %255
+        %256 = OpAccessChain %_ptr_Function_float %ceilCoord %uint_0
+        %257 = OpLoad %float %256
+        %258 = OpFMul %float %254 %257
+        %259 = OpAccessChain %_ptr_Function_float %M %int_2 %int_0
+        %260 = OpLoad %float %259
+        %261 = OpLoad %float %viewNear
+        %262 = OpFMul %float %260 %261
+        %263 = OpFSub %float %258 %262
+        %264 = OpAccessChain %_ptr_Function_float %M %int_0 %int_0
+        %265 = OpLoad %float %264
+        %266 = OpFDiv %float %263 %265
+        %268 = OpLoad %float %viewNear
+        %267 = OpFNegate %float %268
+        %269 = OpAccessChain %_ptr_Function_float %ceilCoord %uint_1
+        %270 = OpLoad %float %269
+        %271 = OpFMul %float %267 %270
+        %272 = OpAccessChain %_ptr_Function_float %M %int_2 %int_1
+        %273 = OpLoad %float %272
+        %274 = OpLoad %float %viewNear
+        %275 = OpFMul %float %273 %274
+        %276 = OpFSub %float %271 %275
+        %277 = OpAccessChain %_ptr_Function_float %M %int_1 %int_1
+        %278 = OpLoad %float %277
+        %279 = OpFDiv %float %276 %278
+        %280 = OpCompositeConstruct %v2float %266 %279
+               OpStore %viewCeilCoord %280
+        %282 = OpAccessChain %_ptr_Function_v4float %frustumPlanes %int_0
+        %284 = OpAccessChain %_ptr_Function_float %viewFloorCoord %uint_0
+        %285 = OpLoad %float %284
+        %283 = OpFNegate %float %285
+        %286 = OpLoad %float %viewNear
+        %287 = OpFDiv %float %283 %286
+        %288 = OpCompositeConstruct %v4float %float_1 %float_0 %287 %float_0
+               OpStore %282 %288
+        %289 = OpAccessChain %_ptr_Function_v4float %frustumPlanes %int_1
+        %290 = OpAccessChain %_ptr_Function_float %viewCeilCoord %uint_0
+        %291 = OpLoad %float %290
+        %292 = OpLoad %float %viewNear
+        %293 = OpFDiv %float %291 %292
+        %294 = OpCompositeConstruct %v4float %float_n1 %float_0 %293 %float_0
+               OpStore %289 %294
+        %295 = OpAccessChain %_ptr_Function_v4float %frustumPlanes %int_2
+        %297 = OpAccessChain %_ptr_Function_float %viewFloorCoord %uint_1
+        %298 = OpLoad %float %297
+        %296 = OpFNegate %float %298
+        %299 = OpLoad %float %viewNear
+        %300 = OpFDiv %float %296 %299
+        %301 = OpCompositeConstruct %v4float %float_0 %float_1 %300 %float_0
+               OpStore %295 %301
+        %302 = OpAccessChain %_ptr_Function_v4float %frustumPlanes %int_3
+        %303 = OpAccessChain %_ptr_Function_float %viewCeilCoord %uint_1
+        %304 = OpLoad %float %303
+        %305 = OpLoad %float %viewNear
+        %306 = OpFDiv %float %304 %305
+        %307 = OpCompositeConstruct %v4float %float_0 %float_n1 %306 %float_0
+               OpStore %302 %307
+               OpStore %dp %float_0
+               OpStore %i %uint_0
+               OpBranch %310
+        %310 = OpLabel
+               OpLoopMerge %311 %312 None
+               OpBranch %313
+        %313 = OpLabel
+        %315 = OpLoad %uint %i
+        %316 = OpULessThan %bool %315 %uint_6
+        %314 = OpLogicalNot %bool %316
+               OpSelectionMerge %317 None
+               OpBranchConditional %314 %318 %317
+        %318 = OpLabel
+               OpBranch %311
+        %317 = OpLabel
+        %320 = OpLoad %uint %i
+        %321 = OpAccessChain %_ptr_Function_float %frustumPlanes %320 %uint_0
+        %322 = OpLoad %float %321
+        %323 = OpFOrdGreaterThan %bool %322 %float_0
+               OpSelectionMerge %324 None
+               OpBranchConditional %323 %325 %326
+        %325 = OpLabel
+        %327 = OpAccessChain %_ptr_Function_float %p %uint_0
+        %328 = OpAccessChain %_ptr_Function_float %boxMax %uint_0
+        %329 = OpLoad %float %328
+               OpStore %327 %329
+               OpBranch %324
+        %326 = OpLabel
+        %330 = OpAccessChain %_ptr_Function_float %p %uint_0
+        %331 = OpAccessChain %_ptr_Function_float %boxMin %uint_0
+        %332 = OpLoad %float %331
+               OpStore %330 %332
+               OpBranch %324
+        %324 = OpLabel
+        %333 = OpLoad %uint %i
+        %334 = OpAccessChain %_ptr_Function_float %frustumPlanes %333 %uint_1
+        %335 = OpLoad %float %334
+        %336 = OpFOrdGreaterThan %bool %335 %float_0
+               OpSelectionMerge %337 None
+               OpBranchConditional %336 %338 %339
+        %338 = OpLabel
+        %340 = OpAccessChain %_ptr_Function_float %p %uint_1
+        %341 = OpAccessChain %_ptr_Function_float %boxMax %uint_1
+        %342 = OpLoad %float %341
+               OpStore %340 %342
+               OpBranch %337
+        %339 = OpLabel
+        %343 = OpAccessChain %_ptr_Function_float %p %uint_1
+        %344 = OpAccessChain %_ptr_Function_float %boxMin %uint_1
+        %345 = OpLoad %float %344
+               OpStore %343 %345
+               OpBranch %337
+        %337 = OpLabel
+        %346 = OpLoad %uint %i
+        %347 = OpAccessChain %_ptr_Function_float %frustumPlanes %346 %uint_2
+        %348 = OpLoad %float %347
+        %349 = OpFOrdGreaterThan %bool %348 %float_0
+               OpSelectionMerge %350 None
+               OpBranchConditional %349 %351 %352
+        %351 = OpLabel
+        %353 = OpAccessChain %_ptr_Function_float %p %uint_2
+        %354 = OpAccessChain %_ptr_Function_float %boxMax %uint_2
+        %355 = OpLoad %float %354
+               OpStore %353 %355
+               OpBranch %350
+        %352 = OpLabel
+        %356 = OpAccessChain %_ptr_Function_float %p %uint_2
+        %357 = OpAccessChain %_ptr_Function_float %boxMin %uint_2
+        %358 = OpLoad %float %357
+               OpStore %356 %358
+               OpBranch %350
+        %350 = OpLabel
+        %359 = OpAccessChain %_ptr_Function_float %p %uint_3
+               OpStore %359 %float_1
+        %360 = OpLoad %float %dp
+        %363 = OpLoad %v4float %p
+        %364 = OpLoad %uint %i
+        %365 = OpAccessChain %_ptr_Function_v4float %frustumPlanes %364
+        %366 = OpLoad %v4float %365
+        %362 = OpDot %float %363 %366
+        %361 = OpExtInst %float %60 NMin %float_0 %362
+        %367 = OpFAdd %float %360 %361
+               OpStore %dp %367
+               OpBranch %312
+        %312 = OpLabel
+        %368 = OpLoad %uint %i
+        %369 = OpIAdd %uint %368 %uint_1
+               OpStore %i %369
+               OpBranch %310
+        %311 = OpLabel
+        %370 = OpLoad %float %dp
+        %371 = OpFOrdGreaterThanEqual %bool %370 %float_0
+               OpSelectionMerge %372 None
+               OpBranchConditional %371 %373 %372
+        %373 = OpLabel
+        %375 = OpLoad %int %x
+        %376 = OpLoad %int %y
+        %377 = OpIMul %int %376 %int_2
+        %378 = OpIAdd %int %375 %377
+        %374 = OpBitcast %uint %378
+               OpStore %tileId %374
+        %380 = OpLoad %uint %tileId
+        %381 = OpULessThan %bool %380 %uint_0
+               OpSelectionMerge %382 None
+               OpBranchConditional %381 %382 %383
+        %383 = OpLabel
+        %384 = OpLoad %uint %tileId
+        %385 = OpAccessChain %_ptr_Uniform_uint %config %uint_1
+        %386 = OpLoad %uint %385
+        %387 = OpUGreaterThanEqual %bool %384 %386
+               OpBranch %382
+        %382 = OpLabel
+        %388 = OpPhi %bool %381 %373 %387 %383
+               OpSelectionMerge %389 None
+               OpBranchConditional %388 %390 %389
+        %390 = OpLabel
+               OpBranch %183
+        %389 = OpLabel
+        %393 = OpLoad %uint %tileId
+        %395 = OpAccessChain %_ptr_StorageBuffer_uint %tileLightId %uint_0 %393 %uint_0
+        %391 = OpAtomicIAdd %uint %395 %uint_1 %uint_0 %uint_1
+               OpStore %offset %391
+        %397 = OpLoad %uint %offset
+        %398 = OpAccessChain %_ptr_Uniform_uint %config %uint_4
+        %399 = OpLoad %uint %398
+        %400 = OpUGreaterThanEqual %bool %397 %399
+               OpSelectionMerge %401 None
+               OpBranchConditional %400 %402 %401
+        %402 = OpLabel
+               OpBranch %183
+        %401 = OpLabel
+        %403 = OpLoad %uint %tileId
+        %404 = OpLoad %uint %offset
+        %406 = OpAccessChain %_ptr_StorageBuffer_uint_0 %tileLightId %uint_0 %403 %uint_1 %404
+        %407 = OpCompositeExtract %uint %GlobalInvocationID 0
+               OpStore %406 %407
+               OpBranch %372
+        %372 = OpLabel
+               OpBranch %183
+        %183 = OpLabel
+        %408 = OpLoad %int %x
+        %409 = OpIAdd %int %408 %int_1
+               OpStore %x %409
+               OpBranch %181
+        %182 = OpLabel
+               OpBranch %173
+        %173 = OpLabel
+        %410 = OpLoad %int %y
+        %411 = OpIAdd %int %410 %int_1
+               OpStore %y %411
+               OpBranch %171
+        %172 = OpLabel
+               OpReturn
+               OpFunctionEnd
+       %main = OpFunction %void None %412
+        %414 = OpLabel
+        %416 = OpLoad %v3uint %GlobalInvocationID_1
+        %415 = OpFunctionCall %void %main_inner %416
+               OpReturn
+               OpFunctionEnd
diff --git a/test/bug/tint/1121.wgsl.expected.wgsl b/test/bug/tint/1121.wgsl.expected.wgsl
new file mode 100644
index 0000000..65efc9a
--- /dev/null
+++ b/test/bug/tint/1121.wgsl.expected.wgsl
@@ -0,0 +1,119 @@
+struct LightData {
+  position : vec4<f32>;
+  color : vec3<f32>;
+  radius : f32;
+};
+
+[[block]]
+struct LightsBuffer {
+  lights : array<LightData>;
+};
+
+[[group(0), binding(0)]] var<storage, read_write> lightsBuffer : LightsBuffer;
+
+struct TileLightIdData {
+  count : atomic<u32>;
+  lightId : array<u32, 64>;
+};
+
+[[block]]
+struct Tiles {
+  data : array<TileLightIdData, 4>;
+};
+
+[[group(1), binding(0)]] var<storage, read_write> tileLightId : Tiles;
+
+[[block]]
+struct Config {
+  numLights : u32;
+  numTiles : u32;
+  tileCountX : u32;
+  tileCountY : u32;
+  numTileLightSlot : u32;
+  tileSize : u32;
+};
+
+[[group(2), binding(0)]] var<uniform> config : Config;
+
+[[block]]
+struct Uniforms {
+  min : vec4<f32>;
+  max : vec4<f32>;
+  viewMatrix : mat4x4<f32>;
+  projectionMatrix : mat4x4<f32>;
+  fullScreenSize : vec4<f32>;
+};
+
+[[group(3), binding(0)]] var<uniform> uniforms : Uniforms;
+
+[[stage(compute), workgroup_size(64, 1, 1)]]
+fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) {
+  var index = GlobalInvocationID.x;
+  if ((index >= config.numLights)) {
+    return;
+  }
+  lightsBuffer.lights[index].position.y = ((lightsBuffer.lights[index].position.y - 0.100000001) + (0.001 * (f32(index) - (64.0 * floor((f32(index) / 64.0))))));
+  if ((lightsBuffer.lights[index].position.y < uniforms.min.y)) {
+    lightsBuffer.lights[index].position.y = uniforms.max.y;
+  }
+  var M : mat4x4<f32> = uniforms.projectionMatrix;
+  var viewNear : f32 = (-(M[3][2]) / (-1.0 + M[2][2]));
+  var viewFar : f32 = (-(M[3][2]) / (1.0 + M[2][2]));
+  var lightPos = lightsBuffer.lights[index].position;
+  lightPos = (uniforms.viewMatrix * lightPos);
+  lightPos = (lightPos / lightPos.w);
+  var lightRadius : f32 = lightsBuffer.lights[index].radius;
+  var boxMin : vec4<f32> = (lightPos - vec4<f32>(vec3<f32>(lightRadius), 0.0));
+  var boxMax : vec4<f32> = (lightPos + vec4<f32>(vec3<f32>(lightRadius), 0.0));
+  var frustumPlanes : array<vec4<f32>, 6>;
+  frustumPlanes[4] = vec4<f32>(0.0, 0.0, -1.0, viewNear);
+  frustumPlanes[5] = vec4<f32>(0.0, 0.0, 1.0, -(viewFar));
+  let TILE_SIZE : i32 = 16;
+  let TILE_COUNT_X : i32 = 2;
+  let TILE_COUNT_Y : i32 = 2;
+  for(var y : i32 = 0; (y < TILE_COUNT_Y); y = (y + 1)) {
+    for(var x : i32 = 0; (x < TILE_COUNT_X); x = (x + 1)) {
+      var tilePixel0Idx : vec2<i32> = vec2<i32>((x * TILE_SIZE), (y * TILE_SIZE));
+      var floorCoord : vec2<f32> = (((2.0 * vec2<f32>(tilePixel0Idx)) / uniforms.fullScreenSize.xy) - vec2<f32>(1.0));
+      var ceilCoord : vec2<f32> = (((2.0 * vec2<f32>((tilePixel0Idx + vec2<i32>(TILE_SIZE)))) / uniforms.fullScreenSize.xy) - vec2<f32>(1.0));
+      var viewFloorCoord : vec2<f32> = vec2<f32>((((-(viewNear) * floorCoord.x) - (M[2][0] * viewNear)) / M[0][0]), (((-(viewNear) * floorCoord.y) - (M[2][1] * viewNear)) / M[1][1]));
+      var viewCeilCoord : vec2<f32> = vec2<f32>((((-(viewNear) * ceilCoord.x) - (M[2][0] * viewNear)) / M[0][0]), (((-(viewNear) * ceilCoord.y) - (M[2][1] * viewNear)) / M[1][1]));
+      frustumPlanes[0] = vec4<f32>(1.0, 0.0, (-(viewFloorCoord.x) / viewNear), 0.0);
+      frustumPlanes[1] = vec4<f32>(-1.0, 0.0, (viewCeilCoord.x / viewNear), 0.0);
+      frustumPlanes[2] = vec4<f32>(0.0, 1.0, (-(viewFloorCoord.y) / viewNear), 0.0);
+      frustumPlanes[3] = vec4<f32>(0.0, -1.0, (viewCeilCoord.y / viewNear), 0.0);
+      var dp : f32 = 0.0;
+      for(var i : u32 = 0u; (i < 6u); i = (i + 1u)) {
+        var p : vec4<f32>;
+        if ((frustumPlanes[i].x > 0.0)) {
+          p.x = boxMax.x;
+        } else {
+          p.x = boxMin.x;
+        }
+        if ((frustumPlanes[i].y > 0.0)) {
+          p.y = boxMax.y;
+        } else {
+          p.y = boxMin.y;
+        }
+        if ((frustumPlanes[i].z > 0.0)) {
+          p.z = boxMax.z;
+        } else {
+          p.z = boxMin.z;
+        }
+        p.w = 1.0;
+        dp = (dp + min(0.0, dot(p, frustumPlanes[i])));
+      }
+      if ((dp >= 0.0)) {
+        var tileId : u32 = u32((x + (y * TILE_COUNT_X)));
+        if (((tileId < 0u) || (tileId >= config.numTiles))) {
+          continue;
+        }
+        var offset : u32 = atomicAdd(&(tileLightId.data[tileId].count), 1u);
+        if ((offset >= config.numTileLightSlot)) {
+          continue;
+        }
+        tileLightId.data[tileId].lightId[offset] = GlobalInvocationID.x;
+      }
+    }
+  }
+}
diff --git a/test/bug/tint/294.wgsl.expected.msl b/test/bug/tint/294.wgsl.expected.msl
index fb5e0c6..065aee7 100644
--- a/test/bug/tint/294.wgsl.expected.msl
+++ b/test/bug/tint/294.wgsl.expected.msl
@@ -1,6 +1,17 @@
 #include <metal_stdlib>
 
 using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
 struct Light {
   /* 0x0000 */ packed_float3 position;
   /* 0x000c */ int8_t tint_pad[4];
diff --git a/test/bug/tint/948.wgsl.expected.msl b/test/bug/tint/948.wgsl.expected.msl
index eefb6f4..a0e0430 100644
--- a/test/bug/tint/948.wgsl.expected.msl
+++ b/test/bug/tint/948.wgsl.expected.msl
@@ -1,6 +1,17 @@
 #include <metal_stdlib>
 
 using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
 struct LeftOver {
   /* 0x0000 */ float time;
   /* 0x0004 */ uint padding;
diff --git a/test/bug/tint/949.wgsl.expected.msl b/test/bug/tint/949.wgsl.expected.msl
index 63e6097..776865d 100644
--- a/test/bug/tint/949.wgsl.expected.msl
+++ b/test/bug/tint/949.wgsl.expected.msl
@@ -1,6 +1,17 @@
 #include <metal_stdlib>
 
 using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
 struct lightingInfo {
   float3 diffuse;
   float3 specular;
diff --git a/test/bug/tint/980.wgsl.expected.msl b/test/bug/tint/980.wgsl.expected.msl
index 27ccfef..4c9c071 100644
--- a/test/bug/tint/980.wgsl.expected.msl
+++ b/test/bug/tint/980.wgsl.expected.msl
@@ -1,6 +1,17 @@
 #include <metal_stdlib>
 
 using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
 struct S {
   /* 0x0000 */ packed_float3 v;
   /* 0x000c */ uint i;
diff --git a/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl b/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl
new file mode 100644
index 0000000..ccfdaa1
--- /dev/null
+++ b/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl
@@ -0,0 +1,11 @@
+[[block]]
+struct S {
+    matrix : mat3x2<f32>;
+    vector : vec3<f32>;
+};
+[[group(0), binding(0)]] var<uniform> data: S;
+
+[[stage(fragment)]]
+fn main() {
+  let x = data.matrix * data.vector;
+}
diff --git a/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.hlsl b/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.hlsl
new file mode 100644
index 0000000..6e12e5c
--- /dev/null
+++ b/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.hlsl
@@ -0,0 +1,18 @@
+cbuffer cbuffer_data : register(b0, space0) {
+  uint4 data[3];
+};
+
+float3x2 tint_symbol_2(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+void main() {
+  const float2 x = mul(asfloat(data[2].xyz), tint_symbol_2(data, 0u));
+  return;
+}
diff --git a/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.msl b/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.msl
new file mode 100644
index 0000000..409b87b
--- /dev/null
+++ b/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.msl
@@ -0,0 +1,26 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
+struct S {
+  /* 0x0000 */ float3x2 tint_symbol;
+  /* 0x0018 */ int8_t tint_pad[8];
+  /* 0x0020 */ packed_float3 vector;
+  /* 0x002c */ int8_t tint_pad_1[4];
+};
+
+fragment void tint_symbol_1(constant S& data [[buffer(0)]]) {
+  float2 const x = (data.tint_symbol * data.vector);
+  return;
+}
+
diff --git a/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.spvasm b/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..ea2ff26
--- /dev/null
+++ b/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.spvasm
@@ -0,0 +1,45 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 22
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpName %S "S"
+               OpMemberName %S 0 "matrix"
+               OpMemberName %S 1 "vector"
+               OpName %data "data"
+               OpName %main "main"
+               OpDecorate %S Block
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 0 ColMajor
+               OpMemberDecorate %S 0 MatrixStride 8
+               OpMemberDecorate %S 1 Offset 32
+               OpDecorate %data NonWritable
+               OpDecorate %data DescriptorSet 0
+               OpDecorate %data Binding 0
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+    %v3float = OpTypeVector %float 3
+          %S = OpTypeStruct %mat3v2float %v3float
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+       %data = OpVariable %_ptr_Uniform_S Uniform
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v2float = OpTypePointer Uniform %mat3v2float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_mat3v2float %data %uint_0
+         %16 = OpLoad %mat3v2float %15
+         %19 = OpAccessChain %_ptr_Uniform_v3float %data %uint_1
+         %20 = OpLoad %v3float %19
+         %21 = OpMatrixTimesVector %v2float %16 %20
+               OpReturn
+               OpFunctionEnd
diff --git a/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.wgsl b/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..e2551fa
--- /dev/null
+++ b/test/expressions/binary/mul/mat3x2-vec3/f32.wgsl.expected.wgsl
@@ -0,0 +1,12 @@
+[[block]]
+struct S {
+  matrix : mat3x2<f32>;
+  vector : vec3<f32>;
+};
+
+[[group(0), binding(0)]] var<uniform> data : S;
+
+[[stage(fragment)]]
+fn main() {
+  let x = (data.matrix * data.vector);
+}
diff --git a/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl b/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl
new file mode 100644
index 0000000..2193faf
--- /dev/null
+++ b/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl
@@ -0,0 +1,11 @@
+[[block]]
+struct S {
+    matrix : mat3x3<f32>;
+    vector : vec3<f32>;
+};
+[[group(0), binding(0)]] var<uniform> data: S;
+
+[[stage(fragment)]]
+fn main() {
+  let x = data.matrix * data.vector;
+}
diff --git a/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.hlsl b/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.hlsl
new file mode 100644
index 0000000..c680c08
--- /dev/null
+++ b/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.hlsl
@@ -0,0 +1,15 @@
+cbuffer cbuffer_data : register(b0, space0) {
+  uint4 data[4];
+};
+
+float3x3 tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+void main() {
+  const float3 x = mul(asfloat(data[3].xyz), tint_symbol_2(data, 0u));
+  return;
+}
diff --git a/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.msl b/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.msl
new file mode 100644
index 0000000..edbe463
--- /dev/null
+++ b/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.msl
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
+struct S {
+  /* 0x0000 */ float3x3 tint_symbol;
+  /* 0x0030 */ packed_float3 vector;
+  /* 0x003c */ int8_t tint_pad[4];
+};
+
+fragment void tint_symbol_1(constant S& data [[buffer(0)]]) {
+  float3 const x = (data.tint_symbol * data.vector);
+  return;
+}
+
diff --git a/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.spvasm b/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..468d2e9
--- /dev/null
+++ b/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.spvasm
@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 21
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpName %S "S"
+               OpMemberName %S 0 "matrix"
+               OpMemberName %S 1 "vector"
+               OpName %data "data"
+               OpName %main "main"
+               OpDecorate %S Block
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 0 ColMajor
+               OpMemberDecorate %S 0 MatrixStride 16
+               OpMemberDecorate %S 1 Offset 48
+               OpDecorate %data NonWritable
+               OpDecorate %data DescriptorSet 0
+               OpDecorate %data Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+          %S = OpTypeStruct %mat3v3float %v3float
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+       %data = OpVariable %_ptr_Uniform_S Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_Uniform_mat3v3float %data %uint_0
+         %15 = OpLoad %mat3v3float %14
+         %18 = OpAccessChain %_ptr_Uniform_v3float %data %uint_1
+         %19 = OpLoad %v3float %18
+         %20 = OpMatrixTimesVector %v3float %15 %19
+               OpReturn
+               OpFunctionEnd
diff --git a/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.wgsl b/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..e60691c
--- /dev/null
+++ b/test/expressions/binary/mul/mat3x3-vec3/f32.wgsl.expected.wgsl
@@ -0,0 +1,12 @@
+[[block]]
+struct S {
+  matrix : mat3x3<f32>;
+  vector : vec3<f32>;
+};
+
+[[group(0), binding(0)]] var<uniform> data : S;
+
+[[stage(fragment)]]
+fn main() {
+  let x = (data.matrix * data.vector);
+}
diff --git a/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl b/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl
new file mode 100644
index 0000000..498b3dd
--- /dev/null
+++ b/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl
@@ -0,0 +1,11 @@
+[[block]]
+struct S {
+    matrix : mat3x3<f32>;
+    vector : vec3<f32>;
+};
+[[group(0), binding(0)]] var<uniform> data: S;
+
+[[stage(fragment)]]
+fn main() {
+  let x = data.vector * data.matrix;
+}
diff --git a/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.hlsl b/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.hlsl
new file mode 100644
index 0000000..4bd768f
--- /dev/null
+++ b/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.hlsl
@@ -0,0 +1,15 @@
+cbuffer cbuffer_data : register(b0, space0) {
+  uint4 data[4];
+};
+
+float3x3 tint_symbol_3(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+void main() {
+  const float3 x = mul(tint_symbol_3(data, 0u), asfloat(data[3].xyz));
+  return;
+}
diff --git a/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.msl b/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.msl
new file mode 100644
index 0000000..d4a9b71
--- /dev/null
+++ b/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.msl
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
+struct S {
+  /* 0x0000 */ float3x3 tint_symbol;
+  /* 0x0030 */ packed_float3 vector;
+  /* 0x003c */ int8_t tint_pad[4];
+};
+
+fragment void tint_symbol_1(constant S& data [[buffer(0)]]) {
+  float3 const x = (data.vector * data.tint_symbol);
+  return;
+}
+
diff --git a/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.spvasm b/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..1820ed8
--- /dev/null
+++ b/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.spvasm
@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 21
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpName %S "S"
+               OpMemberName %S 0 "matrix"
+               OpMemberName %S 1 "vector"
+               OpName %data "data"
+               OpName %main "main"
+               OpDecorate %S Block
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 0 ColMajor
+               OpMemberDecorate %S 0 MatrixStride 16
+               OpMemberDecorate %S 1 Offset 48
+               OpDecorate %data NonWritable
+               OpDecorate %data DescriptorSet 0
+               OpDecorate %data Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+          %S = OpTypeStruct %mat3v3float %v3float
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+       %data = OpVariable %_ptr_Uniform_S Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_Uniform_v3float %data %uint_1
+         %15 = OpLoad %v3float %14
+         %18 = OpAccessChain %_ptr_Uniform_mat3v3float %data %uint_0
+         %19 = OpLoad %mat3v3float %18
+         %20 = OpVectorTimesMatrix %v3float %15 %19
+               OpReturn
+               OpFunctionEnd
diff --git a/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.wgsl b/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..3a8b957
--- /dev/null
+++ b/test/expressions/binary/mul/vec3-mat3x3/f32.wgsl.expected.wgsl
@@ -0,0 +1,12 @@
+[[block]]
+struct S {
+  matrix : mat3x3<f32>;
+  vector : vec3<f32>;
+};
+
+[[group(0), binding(0)]] var<uniform> data : S;
+
+[[stage(fragment)]]
+fn main() {
+  let x = (data.vector * data.matrix);
+}
diff --git a/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl b/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl
new file mode 100644
index 0000000..97844ad
--- /dev/null
+++ b/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl
@@ -0,0 +1,11 @@
+[[block]]
+struct S {
+    matrix : mat4x3<f32>;
+    vector : vec3<f32>;
+};
+[[group(0), binding(0)]] var<uniform> data: S;
+
+[[stage(fragment)]]
+fn main() {
+  let x = data.vector * data.matrix;
+}
diff --git a/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.hlsl b/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.hlsl
new file mode 100644
index 0000000..574af42
--- /dev/null
+++ b/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.hlsl
@@ -0,0 +1,16 @@
+cbuffer cbuffer_data : register(b0, space0) {
+  uint4 data[5];
+};
+
+float4x3 tint_symbol_3(uint4 buffer[5], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+void main() {
+  const float4 x = mul(tint_symbol_3(data, 0u), asfloat(data[4].xyz));
+  return;
+}
diff --git a/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.msl b/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.msl
new file mode 100644
index 0000000..bb2e75f
--- /dev/null
+++ b/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.msl
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, int N, int M>
+inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
+  return lhs * vec<T, N>(rhs);
+}
+
+template<typename T, int N, int M>
+inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
+  return vec<T, M>(lhs) * rhs;
+}
+
+struct S {
+  /* 0x0000 */ float4x3 tint_symbol;
+  /* 0x0040 */ packed_float3 vector;
+  /* 0x004c */ int8_t tint_pad[4];
+};
+
+fragment void tint_symbol_1(constant S& data [[buffer(0)]]) {
+  float4 const x = (data.vector * data.tint_symbol);
+  return;
+}
+
diff --git a/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.spvasm b/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..2efe570
--- /dev/null
+++ b/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.spvasm
@@ -0,0 +1,45 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 22
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpName %S "S"
+               OpMemberName %S 0 "matrix"
+               OpMemberName %S 1 "vector"
+               OpName %data "data"
+               OpName %main "main"
+               OpDecorate %S Block
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 0 ColMajor
+               OpMemberDecorate %S 0 MatrixStride 16
+               OpMemberDecorate %S 1 Offset 64
+               OpDecorate %data NonWritable
+               OpDecorate %data DescriptorSet 0
+               OpDecorate %data Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+          %S = OpTypeStruct %mat4v3float %v3float
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+       %data = OpVariable %_ptr_Uniform_S Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+    %v4float = OpTypeVector %float 4
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_Uniform_v3float %data %uint_1
+         %15 = OpLoad %v3float %14
+         %18 = OpAccessChain %_ptr_Uniform_mat4v3float %data %uint_0
+         %19 = OpLoad %mat4v3float %18
+         %20 = OpVectorTimesMatrix %v4float %15 %19
+               OpReturn
+               OpFunctionEnd
diff --git a/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.wgsl b/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..2f567cc
--- /dev/null
+++ b/test/expressions/binary/mul/vec3-mat4x3/f32.wgsl.expected.wgsl
@@ -0,0 +1,12 @@
+[[block]]
+struct S {
+  matrix : mat4x3<f32>;
+  vector : vec3<f32>;
+};
+
+[[group(0), binding(0)]] var<uniform> data : S;
+
+[[stage(fragment)]]
+fn main() {
+  let x = (data.vector * data.matrix);
+}