Tint/E2E: Add f16 uniform/storage buffer E2E tests This CL add Tint E2E tests for f16 types in uniform and storage buffers. Bug: tint:1473, tint:1502 Change-Id: I325524d2df326240cc1b080a90abf5bd076b3da1 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/107543 Reviewed-by: Ben Clayton <bclayton@google.com> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com>

commit: 776b221ae2e337b2b3410456c3e4fa95a5c93dae [log] [tgz]
author: Zhaoming Jiang <zhaoming.jiang@intel.com> Wed Nov 30 02:47:27 2022 +0000
committer: Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com> Wed Nov 30 02:47:27 2022 +0000
tree: 4f3937c884d3c0c0e0b85c8ad349c04a25185543
parent: 205e16de63a2a642394202ab57a06ff07273064d [diff]
diff --git a/test/tint/buffer/storage/dynamic_index/read.wgsl b/test/tint/buffer/storage/dynamic_index/read.wgsl
index 7c50c42..e0995f7 100644
--- a/test/tint/buffer/storage/dynamic_index/read.wgsl
+++ b/test/tint/buffer/storage/dynamic_index/read.wgsl

@@ -1,30 +1,56 @@
 struct Inner {
-    a : vec3<i32>,
-    b : i32,
-    c : vec3<u32>,
-    d : u32,
-    e : vec3<f32>,
-    f : f32,
-    g : mat2x3<f32>,
-    h : mat3x2<f32>,
-    i : array<vec4<i32>, 4>,
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    arr2_vec3_f32 : array<vec3<f32>, 2>,
 };
 
 struct S {
     arr : array<Inner>,
 };
 
-@binding(0) @group(0) var<storage, read> s : S;
+@binding(0) @group(0) var<storage, read> sb : S;
 
 @compute @workgroup_size(1)
 fn main(@builtin(local_invocation_index) idx : u32) {
-    let a = s.arr[idx].a;
-    let b = s.arr[idx].b;
-    let c = s.arr[idx].c;
-    let d = s.arr[idx].d;
-    let e = s.arr[idx].e;
-    let f = s.arr[idx].f;
-    let g = s.arr[idx].g;
-    let h = s.arr[idx].h;
-    let i = s.arr[idx].i;
+    let scalar_f32 : f32 = sb.arr[idx].scalar_f32;
+    let scalar_i32 : i32 = sb.arr[idx].scalar_i32;
+    let scalar_u32 : u32 = sb.arr[idx].scalar_u32;
+    let vec2_f32 : vec2<f32> = sb.arr[idx].vec2_f32;
+    let vec2_i32 : vec2<i32> = sb.arr[idx].vec2_i32;
+    let vec2_u32 : vec2<u32> = sb.arr[idx].vec2_u32;
+    let vec3_f32 : vec3<f32> = sb.arr[idx].vec3_f32;
+    let vec3_i32 : vec3<i32> = sb.arr[idx].vec3_i32;
+    let vec3_u32 : vec3<u32> = sb.arr[idx].vec3_u32;
+    let vec4_f32 : vec4<f32> = sb.arr[idx].vec4_f32;
+    let vec4_i32 : vec4<i32> = sb.arr[idx].vec4_i32;
+    let vec4_u32 : vec4<u32> = sb.arr[idx].vec4_u32;
+    let mat2x2_f32 : mat2x2<f32> = sb.arr[idx].mat2x2_f32;
+    let mat2x3_f32 : mat2x3<f32> = sb.arr[idx].mat2x3_f32;
+    let mat2x4_f32 : mat2x4<f32> = sb.arr[idx].mat2x4_f32;
+    let mat3x2_f32 : mat3x2<f32> = sb.arr[idx].mat3x2_f32;
+    let mat3x3_f32 : mat3x3<f32> = sb.arr[idx].mat3x3_f32;
+    let mat3x4_f32 : mat3x4<f32> = sb.arr[idx].mat3x4_f32;
+    let mat4x2_f32 : mat4x2<f32> = sb.arr[idx].mat4x2_f32;
+    let mat4x3_f32 : mat4x3<f32> = sb.arr[idx].mat4x3_f32;
+    let mat4x4_f32 : mat4x4<f32> = sb.arr[idx].mat4x4_f32;
+    let arr2_vec3_f32 : array<vec3<f32>, 2> = sb.arr[idx].arr2_vec3_f32;
 }

diff --git a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.dxc.hlsl
index 789126b..17abe57 100644
--- a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.dxc.hlsl

@@ -1,38 +1,79 @@
-ByteAddressBuffer s : register(t0, space0);
+ByteAddressBuffer sb : register(t0, space0);
 
 struct tint_symbol_1 {
   uint idx : SV_GroupIndex;
 };
 
-float2x3 tint_symbol_8(ByteAddressBuffer buffer, uint offset) {
+float2x2 tint_symbol_14(ByteAddressBuffer buffer, uint offset) {
+  return float2x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))));
+}
+
+float2x3 tint_symbol_15(ByteAddressBuffer buffer, uint offset) {
   return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));
 }
 
-float3x2 tint_symbol_9(ByteAddressBuffer buffer, uint offset) {
+float2x4 tint_symbol_16(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+float3x2 tint_symbol_17(ByteAddressBuffer buffer, uint offset) {
   return float3x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))));
 }
 
-typedef int4 tint_symbol_11_ret[4];
-tint_symbol_11_ret tint_symbol_11(ByteAddressBuffer buffer, uint offset) {
-  int4 arr_1[4] = (int4[4])0;
+float3x3 tint_symbol_18(ByteAddressBuffer buffer, uint offset) {
+  return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));
+}
+
+float3x4 tint_symbol_19(ByteAddressBuffer buffer, uint offset) {
+  return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));
+}
+
+float4x2 tint_symbol_20(ByteAddressBuffer buffer, uint offset) {
+  return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));
+}
+
+float4x3 tint_symbol_21(ByteAddressBuffer buffer, uint offset) {
+  return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));
+}
+
+float4x4 tint_symbol_22(ByteAddressBuffer buffer, uint offset) {
+  return float4x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))), asfloat(buffer.Load4((offset + 48u))));
+}
+
+typedef float3 tint_symbol_23_ret[2];
+tint_symbol_23_ret tint_symbol_23(ByteAddressBuffer buffer, uint offset) {
+  float3 arr_1[2] = (float3[2])0;
   {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr_1[i_1] = asint(buffer.Load4((offset + (i_1 * 16u))));
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      arr_1[i] = asfloat(buffer.Load3((offset + (i * 16u))));
     }
   }
   return arr_1;
 }
 
 void main_inner(uint idx) {
-  const int3 a = asint(s.Load3((176u * idx)));
-  const int b = asint(s.Load(((176u * idx) + 12u)));
-  const uint3 c = s.Load3(((176u * idx) + 16u));
-  const uint d = s.Load(((176u * idx) + 28u));
-  const float3 e = asfloat(s.Load3(((176u * idx) + 32u)));
-  const float f = asfloat(s.Load(((176u * idx) + 44u)));
-  const float2x3 g = tint_symbol_8(s, ((176u * idx) + 48u));
-  const float3x2 h = tint_symbol_9(s, ((176u * idx) + 80u));
-  const int4 i[4] = tint_symbol_11(s, ((176u * idx) + 112u));
+  const float scalar_f32 = asfloat(sb.Load((544u * idx)));
+  const int scalar_i32 = asint(sb.Load(((544u * idx) + 4u)));
+  const uint scalar_u32 = sb.Load(((544u * idx) + 8u));
+  const float2 vec2_f32 = asfloat(sb.Load2(((544u * idx) + 16u)));
+  const int2 vec2_i32 = asint(sb.Load2(((544u * idx) + 24u)));
+  const uint2 vec2_u32 = sb.Load2(((544u * idx) + 32u));
+  const float3 vec3_f32 = asfloat(sb.Load3(((544u * idx) + 48u)));
+  const int3 vec3_i32 = asint(sb.Load3(((544u * idx) + 64u)));
+  const uint3 vec3_u32 = sb.Load3(((544u * idx) + 80u));
+  const float4 vec4_f32 = asfloat(sb.Load4(((544u * idx) + 96u)));
+  const int4 vec4_i32 = asint(sb.Load4(((544u * idx) + 112u)));
+  const uint4 vec4_u32 = sb.Load4(((544u * idx) + 128u));
+  const float2x2 mat2x2_f32 = tint_symbol_14(sb, ((544u * idx) + 144u));
+  const float2x3 mat2x3_f32 = tint_symbol_15(sb, ((544u * idx) + 160u));
+  const float2x4 mat2x4_f32 = tint_symbol_16(sb, ((544u * idx) + 192u));
+  const float3x2 mat3x2_f32 = tint_symbol_17(sb, ((544u * idx) + 224u));
+  const float3x3 mat3x3_f32 = tint_symbol_18(sb, ((544u * idx) + 256u));
+  const float3x4 mat3x4_f32 = tint_symbol_19(sb, ((544u * idx) + 304u));
+  const float4x2 mat4x2_f32 = tint_symbol_20(sb, ((544u * idx) + 352u));
+  const float4x3 mat4x3_f32 = tint_symbol_21(sb, ((544u * idx) + 384u));
+  const float4x4 mat4x4_f32 = tint_symbol_22(sb, ((544u * idx) + 448u));
+  const float3 arr2_vec3_f32[2] = tint_symbol_23(sb, ((544u * idx) + 512u));
 }
 
 [numthreads(1, 1, 1)]

diff --git a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.fxc.hlsl
index 789126b..17abe57 100644
--- a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.fxc.hlsl

@@ -1,38 +1,79 @@
-ByteAddressBuffer s : register(t0, space0);
+ByteAddressBuffer sb : register(t0, space0);
 
 struct tint_symbol_1 {
   uint idx : SV_GroupIndex;
 };
 
-float2x3 tint_symbol_8(ByteAddressBuffer buffer, uint offset) {
+float2x2 tint_symbol_14(ByteAddressBuffer buffer, uint offset) {
+  return float2x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))));
+}
+
+float2x3 tint_symbol_15(ByteAddressBuffer buffer, uint offset) {
   return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));
 }
 
-float3x2 tint_symbol_9(ByteAddressBuffer buffer, uint offset) {
+float2x4 tint_symbol_16(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+float3x2 tint_symbol_17(ByteAddressBuffer buffer, uint offset) {
   return float3x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))));
 }
 
-typedef int4 tint_symbol_11_ret[4];
-tint_symbol_11_ret tint_symbol_11(ByteAddressBuffer buffer, uint offset) {
-  int4 arr_1[4] = (int4[4])0;
+float3x3 tint_symbol_18(ByteAddressBuffer buffer, uint offset) {
+  return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));
+}
+
+float3x4 tint_symbol_19(ByteAddressBuffer buffer, uint offset) {
+  return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));
+}
+
+float4x2 tint_symbol_20(ByteAddressBuffer buffer, uint offset) {
+  return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));
+}
+
+float4x3 tint_symbol_21(ByteAddressBuffer buffer, uint offset) {
+  return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));
+}
+
+float4x4 tint_symbol_22(ByteAddressBuffer buffer, uint offset) {
+  return float4x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))), asfloat(buffer.Load4((offset + 48u))));
+}
+
+typedef float3 tint_symbol_23_ret[2];
+tint_symbol_23_ret tint_symbol_23(ByteAddressBuffer buffer, uint offset) {
+  float3 arr_1[2] = (float3[2])0;
   {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr_1[i_1] = asint(buffer.Load4((offset + (i_1 * 16u))));
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      arr_1[i] = asfloat(buffer.Load3((offset + (i * 16u))));
     }
   }
   return arr_1;
 }
 
 void main_inner(uint idx) {
-  const int3 a = asint(s.Load3((176u * idx)));
-  const int b = asint(s.Load(((176u * idx) + 12u)));
-  const uint3 c = s.Load3(((176u * idx) + 16u));
-  const uint d = s.Load(((176u * idx) + 28u));
-  const float3 e = asfloat(s.Load3(((176u * idx) + 32u)));
-  const float f = asfloat(s.Load(((176u * idx) + 44u)));
-  const float2x3 g = tint_symbol_8(s, ((176u * idx) + 48u));
-  const float3x2 h = tint_symbol_9(s, ((176u * idx) + 80u));
-  const int4 i[4] = tint_symbol_11(s, ((176u * idx) + 112u));
+  const float scalar_f32 = asfloat(sb.Load((544u * idx)));
+  const int scalar_i32 = asint(sb.Load(((544u * idx) + 4u)));
+  const uint scalar_u32 = sb.Load(((544u * idx) + 8u));
+  const float2 vec2_f32 = asfloat(sb.Load2(((544u * idx) + 16u)));
+  const int2 vec2_i32 = asint(sb.Load2(((544u * idx) + 24u)));
+  const uint2 vec2_u32 = sb.Load2(((544u * idx) + 32u));
+  const float3 vec3_f32 = asfloat(sb.Load3(((544u * idx) + 48u)));
+  const int3 vec3_i32 = asint(sb.Load3(((544u * idx) + 64u)));
+  const uint3 vec3_u32 = sb.Load3(((544u * idx) + 80u));
+  const float4 vec4_f32 = asfloat(sb.Load4(((544u * idx) + 96u)));
+  const int4 vec4_i32 = asint(sb.Load4(((544u * idx) + 112u)));
+  const uint4 vec4_u32 = sb.Load4(((544u * idx) + 128u));
+  const float2x2 mat2x2_f32 = tint_symbol_14(sb, ((544u * idx) + 144u));
+  const float2x3 mat2x3_f32 = tint_symbol_15(sb, ((544u * idx) + 160u));
+  const float2x4 mat2x4_f32 = tint_symbol_16(sb, ((544u * idx) + 192u));
+  const float3x2 mat3x2_f32 = tint_symbol_17(sb, ((544u * idx) + 224u));
+  const float3x3 mat3x3_f32 = tint_symbol_18(sb, ((544u * idx) + 256u));
+  const float3x4 mat3x4_f32 = tint_symbol_19(sb, ((544u * idx) + 304u));
+  const float4x2 mat4x2_f32 = tint_symbol_20(sb, ((544u * idx) + 352u));
+  const float4x3 mat4x3_f32 = tint_symbol_21(sb, ((544u * idx) + 384u));
+  const float4x4 mat4x4_f32 = tint_symbol_22(sb, ((544u * idx) + 448u));
+  const float3 arr2_vec3_f32[2] = tint_symbol_23(sb, ((544u * idx) + 512u));
 }
 
 [numthreads(1, 1, 1)]

diff --git a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.glsl b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.glsl
index 704afa3..b497053 100644
--- a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.glsl
+++ b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.glsl

@@ -1,33 +1,65 @@
 #version 310 es
 
 struct Inner {
-  ivec3 a;
-  int b;
-  uvec3 c;
-  uint d;
-  vec3 e;
-  float f;
-  mat2x3 g;
-  mat3x2 h;
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
   uint pad;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
   uint pad_1;
-  ivec4 i[4];
+  uint pad_2;
+  vec3 vec3_f32;
+  uint pad_3;
+  ivec3 vec3_i32;
+  uint pad_4;
+  uvec3 vec3_u32;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  mat2 mat2x2_f32;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_6;
+  uint pad_7;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  vec3 arr2_vec3_f32[2];
 };
 
 layout(binding = 0, std430) buffer S_ssbo {
   Inner arr[];
-} s;
+} sb;
 
 void tint_symbol(uint idx) {
-  ivec3 a = s.arr[idx].a;
-  int b = s.arr[idx].b;
-  uvec3 c = s.arr[idx].c;
-  uint d = s.arr[idx].d;
-  vec3 e = s.arr[idx].e;
-  float f = s.arr[idx].f;
-  mat2x3 g = s.arr[idx].g;
-  mat3x2 h = s.arr[idx].h;
-  ivec4 i[4] = s.arr[idx].i;
+  float scalar_f32 = sb.arr[idx].scalar_f32;
+  int scalar_i32 = sb.arr[idx].scalar_i32;
+  uint scalar_u32 = sb.arr[idx].scalar_u32;
+  vec2 vec2_f32 = sb.arr[idx].vec2_f32;
+  ivec2 vec2_i32 = sb.arr[idx].vec2_i32;
+  uvec2 vec2_u32 = sb.arr[idx].vec2_u32;
+  vec3 vec3_f32 = sb.arr[idx].vec3_f32;
+  ivec3 vec3_i32 = sb.arr[idx].vec3_i32;
+  uvec3 vec3_u32 = sb.arr[idx].vec3_u32;
+  vec4 vec4_f32 = sb.arr[idx].vec4_f32;
+  ivec4 vec4_i32 = sb.arr[idx].vec4_i32;
+  uvec4 vec4_u32 = sb.arr[idx].vec4_u32;
+  mat2 mat2x2_f32 = sb.arr[idx].mat2x2_f32;
+  mat2x3 mat2x3_f32 = sb.arr[idx].mat2x3_f32;
+  mat2x4 mat2x4_f32 = sb.arr[idx].mat2x4_f32;
+  mat3x2 mat3x2_f32 = sb.arr[idx].mat3x2_f32;
+  mat3 mat3x3_f32 = sb.arr[idx].mat3x3_f32;
+  mat3x4 mat3x4_f32 = sb.arr[idx].mat3x4_f32;
+  mat4x2 mat4x2_f32 = sb.arr[idx].mat4x2_f32;
+  mat4x3 mat4x3_f32 = sb.arr[idx].mat4x3_f32;
+  mat4 mat4x4_f32 = sb.arr[idx].mat4x4_f32;
+  vec3 arr2_vec3_f32[2] = sb.arr[idx].arr2_vec3_f32;
 }
 
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

diff --git a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.msl b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.msl
index 52e5e7d..06cc9d2 100644
--- a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.msl
+++ b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.msl

@@ -15,16 +15,34 @@
 };
 
 struct Inner {
-  /* 0x0000 */ packed_int3 a;
-  /* 0x000c */ int b;
-  /* 0x0010 */ packed_uint3 c;
-  /* 0x001c */ uint d;
-  /* 0x0020 */ packed_float3 e;
-  /* 0x002c */ float f;
-  /* 0x0030 */ float2x3 g;
-  /* 0x0050 */ float3x2 h;
-  /* 0x0068 */ tint_array<int8_t, 8> tint_pad;
-  /* 0x0070 */ tint_array<int4, 4> i;
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ tint_array<int8_t, 8> tint_pad_1;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_3;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0060 */ float4 vec4_f32;
+  /* 0x0070 */ int4 vec4_i32;
+  /* 0x0080 */ uint4 vec4_u32;
+  /* 0x0090 */ float2x2 mat2x2_f32;
+  /* 0x00a0 */ float2x3 mat2x3_f32;
+  /* 0x00c0 */ float2x4 mat2x4_f32;
+  /* 0x00e0 */ float3x2 mat3x2_f32;
+  /* 0x00f8 */ tint_array<int8_t, 8> tint_pad_5;
+  /* 0x0100 */ float3x3 mat3x3_f32;
+  /* 0x0130 */ float3x4 mat3x4_f32;
+  /* 0x0160 */ float4x2 mat4x2_f32;
+  /* 0x0180 */ float4x3 mat4x3_f32;
+  /* 0x01c0 */ float4x4 mat4x4_f32;
+  /* 0x0200 */ tint_array<float3, 2> arr2_vec3_f32;
 };
 
 struct S {
@@ -32,15 +50,28 @@
 };
 
 void tint_symbol_inner(uint idx, const device S* const tint_symbol_1) {
-  int3 const a = int3((*(tint_symbol_1)).arr[idx].a);
-  int const b = (*(tint_symbol_1)).arr[idx].b;
-  uint3 const c = uint3((*(tint_symbol_1)).arr[idx].c);
-  uint const d = (*(tint_symbol_1)).arr[idx].d;
-  float3 const e = float3((*(tint_symbol_1)).arr[idx].e);
-  float const f = (*(tint_symbol_1)).arr[idx].f;
-  float2x3 const g = (*(tint_symbol_1)).arr[idx].g;
-  float3x2 const h = (*(tint_symbol_1)).arr[idx].h;
-  tint_array<int4, 4> const i = (*(tint_symbol_1)).arr[idx].i;
+  float const scalar_f32 = (*(tint_symbol_1)).arr[idx].scalar_f32;
+  int const scalar_i32 = (*(tint_symbol_1)).arr[idx].scalar_i32;
+  uint const scalar_u32 = (*(tint_symbol_1)).arr[idx].scalar_u32;
+  float2 const vec2_f32 = (*(tint_symbol_1)).arr[idx].vec2_f32;
+  int2 const vec2_i32 = (*(tint_symbol_1)).arr[idx].vec2_i32;
+  uint2 const vec2_u32 = (*(tint_symbol_1)).arr[idx].vec2_u32;
+  float3 const vec3_f32 = float3((*(tint_symbol_1)).arr[idx].vec3_f32);
+  int3 const vec3_i32 = int3((*(tint_symbol_1)).arr[idx].vec3_i32);
+  uint3 const vec3_u32 = uint3((*(tint_symbol_1)).arr[idx].vec3_u32);
+  float4 const vec4_f32 = (*(tint_symbol_1)).arr[idx].vec4_f32;
+  int4 const vec4_i32 = (*(tint_symbol_1)).arr[idx].vec4_i32;
+  uint4 const vec4_u32 = (*(tint_symbol_1)).arr[idx].vec4_u32;
+  float2x2 const mat2x2_f32 = (*(tint_symbol_1)).arr[idx].mat2x2_f32;
+  float2x3 const mat2x3_f32 = (*(tint_symbol_1)).arr[idx].mat2x3_f32;
+  float2x4 const mat2x4_f32 = (*(tint_symbol_1)).arr[idx].mat2x4_f32;
+  float3x2 const mat3x2_f32 = (*(tint_symbol_1)).arr[idx].mat3x2_f32;
+  float3x3 const mat3x3_f32 = (*(tint_symbol_1)).arr[idx].mat3x3_f32;
+  float3x4 const mat3x4_f32 = (*(tint_symbol_1)).arr[idx].mat3x4_f32;
+  float4x2 const mat4x2_f32 = (*(tint_symbol_1)).arr[idx].mat4x2_f32;
+  float4x3 const mat4x3_f32 = (*(tint_symbol_1)).arr[idx].mat4x3_f32;
+  float4x4 const mat4x4_f32 = (*(tint_symbol_1)).arr[idx].mat4x4_f32;
+  tint_array<float3, 2> const arr2_vec3_f32 = (*(tint_symbol_1)).arr[idx].arr2_vec3_f32;
 }
 
 kernel void tint_symbol(const device S* tint_symbol_2 [[buffer(0)]], uint idx [[thread_index_in_threadgroup]]) {

diff --git a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.spvasm b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.spvasm
index 22d8bab..ea7441a 100644
--- a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.spvasm
+++ b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.spvasm

@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 0
-; Bound: 65
+; Bound: 128
 ; Schema: 0
                OpCapability Shader
                OpMemoryModel Logical GLSL450
@@ -11,16 +11,29 @@
                OpName %S "S"
                OpMemberName %S 0 "arr"
                OpName %Inner "Inner"
-               OpMemberName %Inner 0 "a"
-               OpMemberName %Inner 1 "b"
-               OpMemberName %Inner 2 "c"
-               OpMemberName %Inner 3 "d"
-               OpMemberName %Inner 4 "e"
-               OpMemberName %Inner 5 "f"
-               OpMemberName %Inner 6 "g"
-               OpMemberName %Inner 7 "h"
-               OpMemberName %Inner 8 "i"
-               OpName %s "s"
+               OpMemberName %Inner 0 "scalar_f32"
+               OpMemberName %Inner 1 "scalar_i32"
+               OpMemberName %Inner 2 "scalar_u32"
+               OpMemberName %Inner 3 "vec2_f32"
+               OpMemberName %Inner 4 "vec2_i32"
+               OpMemberName %Inner 5 "vec2_u32"
+               OpMemberName %Inner 6 "vec3_f32"
+               OpMemberName %Inner 7 "vec3_i32"
+               OpMemberName %Inner 8 "vec3_u32"
+               OpMemberName %Inner 9 "vec4_f32"
+               OpMemberName %Inner 10 "vec4_i32"
+               OpMemberName %Inner 11 "vec4_u32"
+               OpMemberName %Inner 12 "mat2x2_f32"
+               OpMemberName %Inner 13 "mat2x3_f32"
+               OpMemberName %Inner 14 "mat2x4_f32"
+               OpMemberName %Inner 15 "mat3x2_f32"
+               OpMemberName %Inner 16 "mat3x3_f32"
+               OpMemberName %Inner 17 "mat3x4_f32"
+               OpMemberName %Inner 18 "mat4x2_f32"
+               OpMemberName %Inner 19 "mat4x3_f32"
+               OpMemberName %Inner 20 "mat4x4_f32"
+               OpMemberName %Inner 21 "arr2_vec3_f32"
+               OpName %sb "sb"
                OpName %main_inner "main_inner"
                OpName %idx "idx"
                OpName %main "main"
@@ -28,88 +41,178 @@
                OpDecorate %S Block
                OpMemberDecorate %S 0 Offset 0
                OpMemberDecorate %Inner 0 Offset 0
-               OpMemberDecorate %Inner 1 Offset 12
-               OpMemberDecorate %Inner 2 Offset 16
-               OpMemberDecorate %Inner 3 Offset 28
-               OpMemberDecorate %Inner 4 Offset 32
-               OpMemberDecorate %Inner 5 Offset 44
+               OpMemberDecorate %Inner 1 Offset 4
+               OpMemberDecorate %Inner 2 Offset 8
+               OpMemberDecorate %Inner 3 Offset 16
+               OpMemberDecorate %Inner 4 Offset 24
+               OpMemberDecorate %Inner 5 Offset 32
                OpMemberDecorate %Inner 6 Offset 48
-               OpMemberDecorate %Inner 6 ColMajor
-               OpMemberDecorate %Inner 6 MatrixStride 16
-               OpMemberDecorate %Inner 7 Offset 80
-               OpMemberDecorate %Inner 7 ColMajor
-               OpMemberDecorate %Inner 7 MatrixStride 8
-               OpMemberDecorate %Inner 8 Offset 112
-               OpDecorate %_arr_v4int_uint_4 ArrayStride 16
-               OpDecorate %_runtimearr_Inner ArrayStride 176
-               OpDecorate %s NonWritable
-               OpDecorate %s Binding 0
-               OpDecorate %s DescriptorSet 0
+               OpMemberDecorate %Inner 7 Offset 64
+               OpMemberDecorate %Inner 8 Offset 80
+               OpMemberDecorate %Inner 9 Offset 96
+               OpMemberDecorate %Inner 10 Offset 112
+               OpMemberDecorate %Inner 11 Offset 128
+               OpMemberDecorate %Inner 12 Offset 144
+               OpMemberDecorate %Inner 12 ColMajor
+               OpMemberDecorate %Inner 12 MatrixStride 8
+               OpMemberDecorate %Inner 13 Offset 160
+               OpMemberDecorate %Inner 13 ColMajor
+               OpMemberDecorate %Inner 13 MatrixStride 16
+               OpMemberDecorate %Inner 14 Offset 192
+               OpMemberDecorate %Inner 14 ColMajor
+               OpMemberDecorate %Inner 14 MatrixStride 16
+               OpMemberDecorate %Inner 15 Offset 224
+               OpMemberDecorate %Inner 15 ColMajor
+               OpMemberDecorate %Inner 15 MatrixStride 8
+               OpMemberDecorate %Inner 16 Offset 256
+               OpMemberDecorate %Inner 16 ColMajor
+               OpMemberDecorate %Inner 16 MatrixStride 16
+               OpMemberDecorate %Inner 17 Offset 304
+               OpMemberDecorate %Inner 17 ColMajor
+               OpMemberDecorate %Inner 17 MatrixStride 16
+               OpMemberDecorate %Inner 18 Offset 352
+               OpMemberDecorate %Inner 18 ColMajor
+               OpMemberDecorate %Inner 18 MatrixStride 8
+               OpMemberDecorate %Inner 19 Offset 384
+               OpMemberDecorate %Inner 19 ColMajor
+               OpMemberDecorate %Inner 19 MatrixStride 16
+               OpMemberDecorate %Inner 20 Offset 448
+               OpMemberDecorate %Inner 20 ColMajor
+               OpMemberDecorate %Inner 20 MatrixStride 16
+               OpMemberDecorate %Inner 21 Offset 512
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpDecorate %_runtimearr_Inner ArrayStride 544
+               OpDecorate %sb NonWritable
+               OpDecorate %sb Binding 0
+               OpDecorate %sb DescriptorSet 0
        %uint = OpTypeInt 32 0
 %_ptr_Input_uint = OpTypePointer Input %uint
       %idx_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
         %int = OpTypeInt 32 1
+    %v2float = OpTypeVector %float 2
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+    %v3float = OpTypeVector %float 3
       %v3int = OpTypeVector %int 3
      %v3uint = OpTypeVector %uint 3
-      %float = OpTypeFloat 32
-    %v3float = OpTypeVector %float 3
-%mat2v3float = OpTypeMatrix %v3float 2
-    %v2float = OpTypeVector %float 2
-%mat3v2float = OpTypeMatrix %v2float 3
+    %v4float = OpTypeVector %float 4
       %v4int = OpTypeVector %int 4
-     %uint_4 = OpConstant %uint 4
-%_arr_v4int_uint_4 = OpTypeArray %v4int %uint_4
-      %Inner = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %mat2v3float %mat3v2float %_arr_v4int_uint_4
+     %v4uint = OpTypeVector %uint 4
+%mat2v2float = OpTypeMatrix %v2float 2
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v2float = OpTypeMatrix %v2float 4
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+      %Inner = OpTypeStruct %float %int %uint %v2float %v2int %v2uint %v3float %v3int %v3uint %v4float %v4int %v4uint %mat2v2float %mat2v3float %mat2v4float %mat3v2float %mat3v3float %mat3v4float %mat4v2float %mat4v3float %mat4v4float %_arr_v3float_uint_2
 %_runtimearr_Inner = OpTypeRuntimeArray %Inner
           %S = OpTypeStruct %_runtimearr_Inner
 %_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
-          %s = OpVariable %_ptr_StorageBuffer_S StorageBuffer
+         %sb = OpVariable %_ptr_StorageBuffer_S StorageBuffer
        %void = OpTypeVoid
-         %20 = OpTypeFunction %void %uint
+         %31 = OpTypeFunction %void %uint
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
-     %uint_2 = OpConstant %uint 2
-%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
-     %uint_3 = OpConstant %uint 3
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
-%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+     %uint_3 = OpConstant %uint 3
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+     %uint_4 = OpConstant %uint 4
+%_ptr_StorageBuffer_v2int = OpTypePointer StorageBuffer %v2int
      %uint_5 = OpConstant %uint 5
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
      %uint_6 = OpConstant %uint 6
-%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
      %uint_7 = OpConstant %uint 7
-%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
      %uint_8 = OpConstant %uint 8
-%_ptr_StorageBuffer__arr_v4int_uint_4 = OpTypePointer StorageBuffer %_arr_v4int_uint_4
-         %60 = OpTypeFunction %void
- %main_inner = OpFunction %void None %20
+%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
+     %uint_9 = OpConstant %uint 9
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+    %uint_10 = OpConstant %uint 10
+%_ptr_StorageBuffer_v4int = OpTypePointer StorageBuffer %v4int
+    %uint_11 = OpConstant %uint 11
+%_ptr_StorageBuffer_v4uint = OpTypePointer StorageBuffer %v4uint
+    %uint_12 = OpConstant %uint 12
+%_ptr_StorageBuffer_mat2v2float = OpTypePointer StorageBuffer %mat2v2float
+    %uint_13 = OpConstant %uint 13
+%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+    %uint_14 = OpConstant %uint 14
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+    %uint_15 = OpConstant %uint 15
+%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+    %uint_16 = OpConstant %uint 16
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+    %uint_17 = OpConstant %uint 17
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+    %uint_18 = OpConstant %uint 18
+%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
+    %uint_19 = OpConstant %uint 19
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+    %uint_20 = OpConstant %uint 20
+%_ptr_StorageBuffer_mat4v4float = OpTypePointer StorageBuffer %mat4v4float
+    %uint_21 = OpConstant %uint 21
+%_ptr_StorageBuffer__arr_v3float_uint_2 = OpTypePointer StorageBuffer %_arr_v3float_uint_2
+        %123 = OpTypeFunction %void
+ %main_inner = OpFunction %void None %31
         %idx = OpFunctionParameter %uint
-         %24 = OpLabel
-         %27 = OpAccessChain %_ptr_StorageBuffer_v3int %s %uint_0 %idx %uint_0
-         %28 = OpLoad %v3int %27
-         %31 = OpAccessChain %_ptr_StorageBuffer_int %s %uint_0 %idx %uint_1
-         %32 = OpLoad %int %31
-         %35 = OpAccessChain %_ptr_StorageBuffer_v3uint %s %uint_0 %idx %uint_2
-         %36 = OpLoad %v3uint %35
-         %39 = OpAccessChain %_ptr_StorageBuffer_uint %s %uint_0 %idx %uint_3
-         %40 = OpLoad %uint %39
-         %42 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %idx %uint_4
-         %43 = OpLoad %v3float %42
-         %46 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %idx %uint_5
-         %47 = OpLoad %float %46
-         %50 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %s %uint_0 %idx %uint_6
-         %51 = OpLoad %mat2v3float %50
-         %54 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %s %uint_0 %idx %uint_7
-         %55 = OpLoad %mat3v2float %54
-         %58 = OpAccessChain %_ptr_StorageBuffer__arr_v4int_uint_4 %s %uint_0 %idx %uint_8
-         %59 = OpLoad %_arr_v4int_uint_4 %58
+         %35 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %sb %uint_0 %idx %uint_0
+         %39 = OpLoad %float %38
+         %42 = OpAccessChain %_ptr_StorageBuffer_int %sb %uint_0 %idx %uint_1
+         %43 = OpLoad %int %42
+         %45 = OpAccessChain %_ptr_StorageBuffer_uint %sb %uint_0 %idx %uint_2
+         %46 = OpLoad %uint %45
+         %49 = OpAccessChain %_ptr_StorageBuffer_v2float %sb %uint_0 %idx %uint_3
+         %50 = OpLoad %v2float %49
+         %53 = OpAccessChain %_ptr_StorageBuffer_v2int %sb %uint_0 %idx %uint_4
+         %54 = OpLoad %v2int %53
+         %57 = OpAccessChain %_ptr_StorageBuffer_v2uint %sb %uint_0 %idx %uint_5
+         %58 = OpLoad %v2uint %57
+         %61 = OpAccessChain %_ptr_StorageBuffer_v3float %sb %uint_0 %idx %uint_6
+         %62 = OpLoad %v3float %61
+         %65 = OpAccessChain %_ptr_StorageBuffer_v3int %sb %uint_0 %idx %uint_7
+         %66 = OpLoad %v3int %65
+         %69 = OpAccessChain %_ptr_StorageBuffer_v3uint %sb %uint_0 %idx %uint_8
+         %70 = OpLoad %v3uint %69
+         %73 = OpAccessChain %_ptr_StorageBuffer_v4float %sb %uint_0 %idx %uint_9
+         %74 = OpLoad %v4float %73
+         %77 = OpAccessChain %_ptr_StorageBuffer_v4int %sb %uint_0 %idx %uint_10
+         %78 = OpLoad %v4int %77
+         %81 = OpAccessChain %_ptr_StorageBuffer_v4uint %sb %uint_0 %idx %uint_11
+         %82 = OpLoad %v4uint %81
+         %85 = OpAccessChain %_ptr_StorageBuffer_mat2v2float %sb %uint_0 %idx %uint_12
+         %86 = OpLoad %mat2v2float %85
+         %89 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %sb %uint_0 %idx %uint_13
+         %90 = OpLoad %mat2v3float %89
+         %93 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %sb %uint_0 %idx %uint_14
+         %94 = OpLoad %mat2v4float %93
+         %97 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %sb %uint_0 %idx %uint_15
+         %98 = OpLoad %mat3v2float %97
+        %101 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %sb %uint_0 %idx %uint_16
+        %102 = OpLoad %mat3v3float %101
+        %105 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %sb %uint_0 %idx %uint_17
+        %106 = OpLoad %mat3v4float %105
+        %109 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %sb %uint_0 %idx %uint_18
+        %110 = OpLoad %mat4v2float %109
+        %113 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %sb %uint_0 %idx %uint_19
+        %114 = OpLoad %mat4v3float %113
+        %117 = OpAccessChain %_ptr_StorageBuffer_mat4v4float %sb %uint_0 %idx %uint_20
+        %118 = OpLoad %mat4v4float %117
+        %121 = OpAccessChain %_ptr_StorageBuffer__arr_v3float_uint_2 %sb %uint_0 %idx %uint_21
+        %122 = OpLoad %_arr_v3float_uint_2 %121
                OpReturn
                OpFunctionEnd
-       %main = OpFunction %void None %60
-         %62 = OpLabel
-         %64 = OpLoad %uint %idx_1
-         %63 = OpFunctionCall %void %main_inner %64
+       %main = OpFunction %void None %123
+        %125 = OpLabel
+        %127 = OpLoad %uint %idx_1
+        %126 = OpFunctionCall %void %main_inner %127
                OpReturn
                OpFunctionEnd

diff --git a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.wgsl b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.wgsl
index f082887..6e7b7b7 100644
--- a/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.wgsl
+++ b/test/tint/buffer/storage/dynamic_index/read.wgsl.expected.wgsl

@@ -1,30 +1,56 @@
 struct Inner {
-  a : vec3<i32>,
-  b : i32,
-  c : vec3<u32>,
-  d : u32,
-  e : vec3<f32>,
-  f : f32,
-  g : mat2x3<f32>,
-  h : mat3x2<f32>,
-  i : array<vec4<i32>, 4>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
 }
 
 struct S {
   arr : array<Inner>,
 }
 
-@binding(0) @group(0) var<storage, read> s : S;
+@binding(0) @group(0) var<storage, read> sb : S;
 
 @compute @workgroup_size(1)
 fn main(@builtin(local_invocation_index) idx : u32) {
-  let a = s.arr[idx].a;
-  let b = s.arr[idx].b;
-  let c = s.arr[idx].c;
-  let d = s.arr[idx].d;
-  let e = s.arr[idx].e;
-  let f = s.arr[idx].f;
-  let g = s.arr[idx].g;
-  let h = s.arr[idx].h;
-  let i = s.arr[idx].i;
+  let scalar_f32 : f32 = sb.arr[idx].scalar_f32;
+  let scalar_i32 : i32 = sb.arr[idx].scalar_i32;
+  let scalar_u32 : u32 = sb.arr[idx].scalar_u32;
+  let vec2_f32 : vec2<f32> = sb.arr[idx].vec2_f32;
+  let vec2_i32 : vec2<i32> = sb.arr[idx].vec2_i32;
+  let vec2_u32 : vec2<u32> = sb.arr[idx].vec2_u32;
+  let vec3_f32 : vec3<f32> = sb.arr[idx].vec3_f32;
+  let vec3_i32 : vec3<i32> = sb.arr[idx].vec3_i32;
+  let vec3_u32 : vec3<u32> = sb.arr[idx].vec3_u32;
+  let vec4_f32 : vec4<f32> = sb.arr[idx].vec4_f32;
+  let vec4_i32 : vec4<i32> = sb.arr[idx].vec4_i32;
+  let vec4_u32 : vec4<u32> = sb.arr[idx].vec4_u32;
+  let mat2x2_f32 : mat2x2<f32> = sb.arr[idx].mat2x2_f32;
+  let mat2x3_f32 : mat2x3<f32> = sb.arr[idx].mat2x3_f32;
+  let mat2x4_f32 : mat2x4<f32> = sb.arr[idx].mat2x4_f32;
+  let mat3x2_f32 : mat3x2<f32> = sb.arr[idx].mat3x2_f32;
+  let mat3x3_f32 : mat3x3<f32> = sb.arr[idx].mat3x3_f32;
+  let mat3x4_f32 : mat3x4<f32> = sb.arr[idx].mat3x4_f32;
+  let mat4x2_f32 : mat4x2<f32> = sb.arr[idx].mat4x2_f32;
+  let mat4x3_f32 : mat4x3<f32> = sb.arr[idx].mat4x3_f32;
+  let mat4x4_f32 : mat4x4<f32> = sb.arr[idx].mat4x4_f32;
+  let arr2_vec3_f32 : array<vec3<f32>, 2> = sb.arr[idx].arr2_vec3_f32;
 }

diff --git a/test/tint/buffer/storage/dynamic_index/read_f16.wgsl b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl
new file mode 100644
index 0000000..9351d4b
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl

@@ -0,0 +1,86 @@
+enable f16;
+
+struct Inner {
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    scalar_f16 : f16,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec2_f16 : vec2<f16>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec3_f16 : vec3<f16>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    vec4_f16 : vec4<f16>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    mat2x2_f16 : mat2x2<f16>,
+    mat2x3_f16 : mat2x3<f16>,
+    mat2x4_f16 : mat2x4<f16>,
+    mat3x2_f16 : mat3x2<f16>,
+    mat3x3_f16 : mat3x3<f16>,
+    mat3x4_f16 : mat3x4<f16>,
+    mat4x2_f16 : mat4x2<f16>,
+    mat4x3_f16 : mat4x3<f16>,
+    mat4x4_f16 : mat4x4<f16>,
+    arr2_vec3_f32 : array<vec3<f32>, 2>,
+    arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+};
+
+struct S {
+    arr : array<Inner>,
+};
+
+@binding(0) @group(0) var<storage, read> sb : S;
+
+@compute @workgroup_size(1)
+fn main(@builtin(local_invocation_index) idx : u32) {
+    let scalar_f32 : f32 = sb.arr[idx].scalar_f32;
+    let scalar_i32 : i32 = sb.arr[idx].scalar_i32;
+    let scalar_u32 : u32 = sb.arr[idx].scalar_u32;
+    let scalar_f16 : f16 = sb.arr[idx].scalar_f16;
+    let vec2_f32 : vec2<f32> = sb.arr[idx].vec2_f32;
+    let vec2_i32 : vec2<i32> = sb.arr[idx].vec2_i32;
+    let vec2_u32 : vec2<u32> = sb.arr[idx].vec2_u32;
+    let vec2_f16 : vec2<f16> = sb.arr[idx].vec2_f16;
+    let vec3_f32 : vec3<f32> = sb.arr[idx].vec3_f32;
+    let vec3_i32 : vec3<i32> = sb.arr[idx].vec3_i32;
+    let vec3_u32 : vec3<u32> = sb.arr[idx].vec3_u32;
+    let vec3_f16 : vec3<f16> = sb.arr[idx].vec3_f16;
+    let vec4_f32 : vec4<f32> = sb.arr[idx].vec4_f32;
+    let vec4_i32 : vec4<i32> = sb.arr[idx].vec4_i32;
+    let vec4_u32 : vec4<u32> = sb.arr[idx].vec4_u32;
+    let vec4_f16 : vec4<f16> = sb.arr[idx].vec4_f16;
+    let mat2x2_f32 : mat2x2<f32> = sb.arr[idx].mat2x2_f32;
+    let mat2x3_f32 : mat2x3<f32> = sb.arr[idx].mat2x3_f32;
+    let mat2x4_f32 : mat2x4<f32> = sb.arr[idx].mat2x4_f32;
+    let mat3x2_f32 : mat3x2<f32> = sb.arr[idx].mat3x2_f32;
+    let mat3x3_f32 : mat3x3<f32> = sb.arr[idx].mat3x3_f32;
+    let mat3x4_f32 : mat3x4<f32> = sb.arr[idx].mat3x4_f32;
+    let mat4x2_f32 : mat4x2<f32> = sb.arr[idx].mat4x2_f32;
+    let mat4x3_f32 : mat4x3<f32> = sb.arr[idx].mat4x3_f32;
+    let mat4x4_f32 : mat4x4<f32> = sb.arr[idx].mat4x4_f32;
+    let mat2x2_f16 : mat2x2<f16> = sb.arr[idx].mat2x2_f16;
+    let mat2x3_f16 : mat2x3<f16> = sb.arr[idx].mat2x3_f16;
+    let mat2x4_f16 : mat2x4<f16> = sb.arr[idx].mat2x4_f16;
+    let mat3x2_f16 : mat3x2<f16> = sb.arr[idx].mat3x2_f16;
+    let mat3x3_f16 : mat3x3<f16> = sb.arr[idx].mat3x3_f16;
+    let mat3x4_f16 : mat3x4<f16> = sb.arr[idx].mat3x4_f16;
+    let mat4x2_f16 : mat4x2<f16> = sb.arr[idx].mat4x2_f16;
+    let mat4x3_f16 : mat4x3<f16> = sb.arr[idx].mat4x3_f16;
+    let mat4x4_f16 : mat4x4<f16> = sb.arr[idx].mat4x4_f16;
+    let arr2_vec3_f32 : array<vec3<f32>, 2> = sb.arr[idx].arr2_vec3_f32;
+    let arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = sb.arr[idx].arr2_mat4x2_f16;
+}

diff --git a/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e14b161
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,144 @@
+ByteAddressBuffer sb : register(t0, space0);
+
+struct tint_symbol_1 {
+  uint idx : SV_GroupIndex;
+};
+
+float2x2 tint_symbol_18(ByteAddressBuffer buffer, uint offset) {
+  return float2x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))));
+}
+
+float2x3 tint_symbol_19(ByteAddressBuffer buffer, uint offset) {
+  return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));
+}
+
+float2x4 tint_symbol_20(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+float3x2 tint_symbol_21(ByteAddressBuffer buffer, uint offset) {
+  return float3x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))));
+}
+
+float3x3 tint_symbol_22(ByteAddressBuffer buffer, uint offset) {
+  return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));
+}
+
+float3x4 tint_symbol_23(ByteAddressBuffer buffer, uint offset) {
+  return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));
+}
+
+float4x2 tint_symbol_24(ByteAddressBuffer buffer, uint offset) {
+  return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));
+}
+
+float4x3 tint_symbol_25(ByteAddressBuffer buffer, uint offset) {
+  return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));
+}
+
+float4x4 tint_symbol_26(ByteAddressBuffer buffer, uint offset) {
+  return float4x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))), asfloat(buffer.Load4((offset + 48u))));
+}
+
+matrix<float16_t, 2, 2> tint_symbol_27(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)));
+}
+
+matrix<float16_t, 2, 3> tint_symbol_28(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)));
+}
+
+matrix<float16_t, 2, 4> tint_symbol_29(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)));
+}
+
+matrix<float16_t, 3, 2> tint_symbol_30(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)));
+}
+
+matrix<float16_t, 3, 3> tint_symbol_31(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)));
+}
+
+matrix<float16_t, 3, 4> tint_symbol_32(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)));
+}
+
+matrix<float16_t, 4, 2> tint_symbol_33(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)), buffer.Load<vector<float16_t, 2> >((offset + 12u)));
+}
+
+matrix<float16_t, 4, 3> tint_symbol_34(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)), buffer.Load<vector<float16_t, 3> >((offset + 24u)));
+}
+
+matrix<float16_t, 4, 4> tint_symbol_35(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)), buffer.Load<vector<float16_t, 4> >((offset + 24u)));
+}
+
+typedef float3 tint_symbol_36_ret[2];
+tint_symbol_36_ret tint_symbol_36(ByteAddressBuffer buffer, uint offset) {
+  float3 arr_1[2] = (float3[2])0;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      arr_1[i] = asfloat(buffer.Load3((offset + (i * 16u))));
+    }
+  }
+  return arr_1;
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_37_ret[2];
+tint_symbol_37_ret tint_symbol_37(ByteAddressBuffer buffer, uint offset) {
+  matrix<float16_t, 4, 2> arr_2[2] = (matrix<float16_t, 4, 2>[2])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      arr_2[i_1] = tint_symbol_33(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr_2;
+}
+
+void main_inner(uint idx) {
+  const float scalar_f32 = asfloat(sb.Load((800u * idx)));
+  const int scalar_i32 = asint(sb.Load(((800u * idx) + 4u)));
+  const uint scalar_u32 = sb.Load(((800u * idx) + 8u));
+  const float16_t scalar_f16 = sb.Load<float16_t>(((800u * idx) + 12u));
+  const float2 vec2_f32 = asfloat(sb.Load2(((800u * idx) + 16u)));
+  const int2 vec2_i32 = asint(sb.Load2(((800u * idx) + 24u)));
+  const uint2 vec2_u32 = sb.Load2(((800u * idx) + 32u));
+  const vector<float16_t, 2> vec2_f16 = sb.Load<vector<float16_t, 2> >(((800u * idx) + 40u));
+  const float3 vec3_f32 = asfloat(sb.Load3(((800u * idx) + 48u)));
+  const int3 vec3_i32 = asint(sb.Load3(((800u * idx) + 64u)));
+  const uint3 vec3_u32 = sb.Load3(((800u * idx) + 80u));
+  const vector<float16_t, 3> vec3_f16 = sb.Load<vector<float16_t, 3> >(((800u * idx) + 96u));
+  const float4 vec4_f32 = asfloat(sb.Load4(((800u * idx) + 112u)));
+  const int4 vec4_i32 = asint(sb.Load4(((800u * idx) + 128u)));
+  const uint4 vec4_u32 = sb.Load4(((800u * idx) + 144u));
+  const vector<float16_t, 4> vec4_f16 = sb.Load<vector<float16_t, 4> >(((800u * idx) + 160u));
+  const float2x2 mat2x2_f32 = tint_symbol_18(sb, ((800u * idx) + 168u));
+  const float2x3 mat2x3_f32 = tint_symbol_19(sb, ((800u * idx) + 192u));
+  const float2x4 mat2x4_f32 = tint_symbol_20(sb, ((800u * idx) + 224u));
+  const float3x2 mat3x2_f32 = tint_symbol_21(sb, ((800u * idx) + 256u));
+  const float3x3 mat3x3_f32 = tint_symbol_22(sb, ((800u * idx) + 288u));
+  const float3x4 mat3x4_f32 = tint_symbol_23(sb, ((800u * idx) + 336u));
+  const float4x2 mat4x2_f32 = tint_symbol_24(sb, ((800u * idx) + 384u));
+  const float4x3 mat4x3_f32 = tint_symbol_25(sb, ((800u * idx) + 416u));
+  const float4x4 mat4x4_f32 = tint_symbol_26(sb, ((800u * idx) + 480u));
+  const matrix<float16_t, 2, 2> mat2x2_f16 = tint_symbol_27(sb, ((800u * idx) + 544u));
+  const matrix<float16_t, 2, 3> mat2x3_f16 = tint_symbol_28(sb, ((800u * idx) + 552u));
+  const matrix<float16_t, 2, 4> mat2x4_f16 = tint_symbol_29(sb, ((800u * idx) + 568u));
+  const matrix<float16_t, 3, 2> mat3x2_f16 = tint_symbol_30(sb, ((800u * idx) + 584u));
+  const matrix<float16_t, 3, 3> mat3x3_f16 = tint_symbol_31(sb, ((800u * idx) + 600u));
+  const matrix<float16_t, 3, 4> mat3x4_f16 = tint_symbol_32(sb, ((800u * idx) + 624u));
+  const matrix<float16_t, 4, 2> mat4x2_f16 = tint_symbol_33(sb, ((800u * idx) + 648u));
+  const matrix<float16_t, 4, 3> mat4x3_f16 = tint_symbol_34(sb, ((800u * idx) + 664u));
+  const matrix<float16_t, 4, 4> mat4x4_f16 = tint_symbol_35(sb, ((800u * idx) + 696u));
+  const float3 arr2_vec3_f32[2] = tint_symbol_36(sb, ((800u * idx) + 736u));
+  const matrix<float16_t, 4, 2> arr2_mat4x2_f16[2] = tint_symbol_37(sb, ((800u * idx) + 768u));
+}
+
+[numthreads(1, 1, 1)]
+void main(tint_symbol_1 tint_symbol) {
+  main_inner(tint_symbol.idx);
+  return;
+}

diff --git a/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..220cc92
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,149 @@
+SKIP: FAILED
+
+ByteAddressBuffer sb : register(t0, space0);
+
+struct tint_symbol_1 {
+  uint idx : SV_GroupIndex;
+};
+
+float2x2 tint_symbol_18(ByteAddressBuffer buffer, uint offset) {
+  return float2x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))));
+}
+
+float2x3 tint_symbol_19(ByteAddressBuffer buffer, uint offset) {
+  return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));
+}
+
+float2x4 tint_symbol_20(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+float3x2 tint_symbol_21(ByteAddressBuffer buffer, uint offset) {
+  return float3x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))));
+}
+
+float3x3 tint_symbol_22(ByteAddressBuffer buffer, uint offset) {
+  return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));
+}
+
+float3x4 tint_symbol_23(ByteAddressBuffer buffer, uint offset) {
+  return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));
+}
+
+float4x2 tint_symbol_24(ByteAddressBuffer buffer, uint offset) {
+  return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));
+}
+
+float4x3 tint_symbol_25(ByteAddressBuffer buffer, uint offset) {
+  return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));
+}
+
+float4x4 tint_symbol_26(ByteAddressBuffer buffer, uint offset) {
+  return float4x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))), asfloat(buffer.Load4((offset + 48u))));
+}
+
+matrix<float16_t, 2, 2> tint_symbol_27(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)));
+}
+
+matrix<float16_t, 2, 3> tint_symbol_28(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)));
+}
+
+matrix<float16_t, 2, 4> tint_symbol_29(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)));
+}
+
+matrix<float16_t, 3, 2> tint_symbol_30(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)));
+}
+
+matrix<float16_t, 3, 3> tint_symbol_31(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)));
+}
+
+matrix<float16_t, 3, 4> tint_symbol_32(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)));
+}
+
+matrix<float16_t, 4, 2> tint_symbol_33(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)), buffer.Load<vector<float16_t, 2> >((offset + 12u)));
+}
+
+matrix<float16_t, 4, 3> tint_symbol_34(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)), buffer.Load<vector<float16_t, 3> >((offset + 24u)));
+}
+
+matrix<float16_t, 4, 4> tint_symbol_35(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)), buffer.Load<vector<float16_t, 4> >((offset + 24u)));
+}
+
+typedef float3 tint_symbol_36_ret[2];
+tint_symbol_36_ret tint_symbol_36(ByteAddressBuffer buffer, uint offset) {
+  float3 arr_1[2] = (float3[2])0;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      arr_1[i] = asfloat(buffer.Load3((offset + (i * 16u))));
+    }
+  }
+  return arr_1;
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_37_ret[2];
+tint_symbol_37_ret tint_symbol_37(ByteAddressBuffer buffer, uint offset) {
+  matrix<float16_t, 4, 2> arr_2[2] = (matrix<float16_t, 4, 2>[2])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      arr_2[i_1] = tint_symbol_33(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr_2;
+}
+
+void main_inner(uint idx) {
+  const float scalar_f32 = asfloat(sb.Load((800u * idx)));
+  const int scalar_i32 = asint(sb.Load(((800u * idx) + 4u)));
+  const uint scalar_u32 = sb.Load(((800u * idx) + 8u));
+  const float16_t scalar_f16 = sb.Load<float16_t>(((800u * idx) + 12u));
+  const float2 vec2_f32 = asfloat(sb.Load2(((800u * idx) + 16u)));
+  const int2 vec2_i32 = asint(sb.Load2(((800u * idx) + 24u)));
+  const uint2 vec2_u32 = sb.Load2(((800u * idx) + 32u));
+  const vector<float16_t, 2> vec2_f16 = sb.Load<vector<float16_t, 2> >(((800u * idx) + 40u));
+  const float3 vec3_f32 = asfloat(sb.Load3(((800u * idx) + 48u)));
+  const int3 vec3_i32 = asint(sb.Load3(((800u * idx) + 64u)));
+  const uint3 vec3_u32 = sb.Load3(((800u * idx) + 80u));
+  const vector<float16_t, 3> vec3_f16 = sb.Load<vector<float16_t, 3> >(((800u * idx) + 96u));
+  const float4 vec4_f32 = asfloat(sb.Load4(((800u * idx) + 112u)));
+  const int4 vec4_i32 = asint(sb.Load4(((800u * idx) + 128u)));
+  const uint4 vec4_u32 = sb.Load4(((800u * idx) + 144u));
+  const vector<float16_t, 4> vec4_f16 = sb.Load<vector<float16_t, 4> >(((800u * idx) + 160u));
+  const float2x2 mat2x2_f32 = tint_symbol_18(sb, ((800u * idx) + 168u));
+  const float2x3 mat2x3_f32 = tint_symbol_19(sb, ((800u * idx) + 192u));
+  const float2x4 mat2x4_f32 = tint_symbol_20(sb, ((800u * idx) + 224u));
+  const float3x2 mat3x2_f32 = tint_symbol_21(sb, ((800u * idx) + 256u));
+  const float3x3 mat3x3_f32 = tint_symbol_22(sb, ((800u * idx) + 288u));
+  const float3x4 mat3x4_f32 = tint_symbol_23(sb, ((800u * idx) + 336u));
+  const float4x2 mat4x2_f32 = tint_symbol_24(sb, ((800u * idx) + 384u));
+  const float4x3 mat4x3_f32 = tint_symbol_25(sb, ((800u * idx) + 416u));
+  const float4x4 mat4x4_f32 = tint_symbol_26(sb, ((800u * idx) + 480u));
+  const matrix<float16_t, 2, 2> mat2x2_f16 = tint_symbol_27(sb, ((800u * idx) + 544u));
+  const matrix<float16_t, 2, 3> mat2x3_f16 = tint_symbol_28(sb, ((800u * idx) + 552u));
+  const matrix<float16_t, 2, 4> mat2x4_f16 = tint_symbol_29(sb, ((800u * idx) + 568u));
+  const matrix<float16_t, 3, 2> mat3x2_f16 = tint_symbol_30(sb, ((800u * idx) + 584u));
+  const matrix<float16_t, 3, 3> mat3x3_f16 = tint_symbol_31(sb, ((800u * idx) + 600u));
+  const matrix<float16_t, 3, 4> mat3x4_f16 = tint_symbol_32(sb, ((800u * idx) + 624u));
+  const matrix<float16_t, 4, 2> mat4x2_f16 = tint_symbol_33(sb, ((800u * idx) + 648u));
+  const matrix<float16_t, 4, 3> mat4x3_f16 = tint_symbol_34(sb, ((800u * idx) + 664u));
+  const matrix<float16_t, 4, 4> mat4x4_f16 = tint_symbol_35(sb, ((800u * idx) + 696u));
+  const float3 arr2_vec3_f32[2] = tint_symbol_36(sb, ((800u * idx) + 736u));
+  const matrix<float16_t, 4, 2> arr2_mat4x2_f16[2] = tint_symbol_37(sb, ((800u * idx) + 768u));
+}
+
+[numthreads(1, 1, 1)]
+void main(tint_symbol_1 tint_symbol) {
+  main_inner(tint_symbol.idx);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000018A2D5DB890(43,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.glsl b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..d150509
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.glsl

@@ -0,0 +1,103 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
+  float16_t scalar_f16;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  f16vec2 vec2_f16;
+  uint pad;
+  vec3 vec3_f32;
+  uint pad_1;
+  ivec3 vec3_i32;
+  uint pad_2;
+  uvec3 vec3_u32;
+  uint pad_3;
+  f16vec3 vec3_f16;
+  uint pad_4;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  f16vec4 vec4_f16;
+  mat2 mat2x2_f32;
+  uint pad_6;
+  uint pad_7;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_8;
+  uint pad_9;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  f16mat2 mat2x2_f16;
+  f16mat2x3 mat2x3_f16;
+  f16mat2x4 mat2x4_f16;
+  f16mat3x2 mat3x2_f16;
+  uint pad_10;
+  f16mat3 mat3x3_f16;
+  f16mat3x4 mat3x4_f16;
+  f16mat4x2 mat4x2_f16;
+  f16mat4x3 mat4x3_f16;
+  f16mat4 mat4x4_f16;
+  uint pad_11;
+  uint pad_12;
+  vec3 arr2_vec3_f32[2];
+  f16mat4x2 arr2_mat4x2_f16[2];
+};
+
+layout(binding = 0, std430) buffer S_ssbo {
+  Inner arr[];
+} sb;
+
+void tint_symbol(uint idx) {
+  float scalar_f32 = sb.arr[idx].scalar_f32;
+  int scalar_i32 = sb.arr[idx].scalar_i32;
+  uint scalar_u32 = sb.arr[idx].scalar_u32;
+  float16_t scalar_f16 = sb.arr[idx].scalar_f16;
+  vec2 vec2_f32 = sb.arr[idx].vec2_f32;
+  ivec2 vec2_i32 = sb.arr[idx].vec2_i32;
+  uvec2 vec2_u32 = sb.arr[idx].vec2_u32;
+  f16vec2 vec2_f16 = sb.arr[idx].vec2_f16;
+  vec3 vec3_f32 = sb.arr[idx].vec3_f32;
+  ivec3 vec3_i32 = sb.arr[idx].vec3_i32;
+  uvec3 vec3_u32 = sb.arr[idx].vec3_u32;
+  f16vec3 vec3_f16 = sb.arr[idx].vec3_f16;
+  vec4 vec4_f32 = sb.arr[idx].vec4_f32;
+  ivec4 vec4_i32 = sb.arr[idx].vec4_i32;
+  uvec4 vec4_u32 = sb.arr[idx].vec4_u32;
+  f16vec4 vec4_f16 = sb.arr[idx].vec4_f16;
+  mat2 mat2x2_f32 = sb.arr[idx].mat2x2_f32;
+  mat2x3 mat2x3_f32 = sb.arr[idx].mat2x3_f32;
+  mat2x4 mat2x4_f32 = sb.arr[idx].mat2x4_f32;
+  mat3x2 mat3x2_f32 = sb.arr[idx].mat3x2_f32;
+  mat3 mat3x3_f32 = sb.arr[idx].mat3x3_f32;
+  mat3x4 mat3x4_f32 = sb.arr[idx].mat3x4_f32;
+  mat4x2 mat4x2_f32 = sb.arr[idx].mat4x2_f32;
+  mat4x3 mat4x3_f32 = sb.arr[idx].mat4x3_f32;
+  mat4 mat4x4_f32 = sb.arr[idx].mat4x4_f32;
+  f16mat2 mat2x2_f16 = sb.arr[idx].mat2x2_f16;
+  f16mat2x3 mat2x3_f16 = sb.arr[idx].mat2x3_f16;
+  f16mat2x4 mat2x4_f16 = sb.arr[idx].mat2x4_f16;
+  f16mat3x2 mat3x2_f16 = sb.arr[idx].mat3x2_f16;
+  f16mat3 mat3x3_f16 = sb.arr[idx].mat3x3_f16;
+  f16mat3x4 mat3x4_f16 = sb.arr[idx].mat3x4_f16;
+  f16mat4x2 mat4x2_f16 = sb.arr[idx].mat4x2_f16;
+  f16mat4x3 mat4x3_f16 = sb.arr[idx].mat4x3_f16;
+  f16mat4 mat4x4_f16 = sb.arr[idx].mat4x4_f16;
+  vec3 arr2_vec3_f32[2] = sb.arr[idx].arr2_vec3_f32;
+  f16mat4x2 arr2_mat4x2_f16[2] = sb.arr[idx].arr2_mat4x2_f16;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.msl b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.msl
new file mode 100644
index 0000000..ee0138a
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.msl

@@ -0,0 +1,113 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ half scalar_f16;
+  /* 0x000e */ tint_array<int8_t, 2> tint_pad;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ half2 vec2_f16;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_3;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0060 */ packed_half3 vec3_f16;
+  /* 0x0066 */ tint_array<int8_t, 10> tint_pad_5;
+  /* 0x0070 */ float4 vec4_f32;
+  /* 0x0080 */ int4 vec4_i32;
+  /* 0x0090 */ uint4 vec4_u32;
+  /* 0x00a0 */ half4 vec4_f16;
+  /* 0x00a8 */ float2x2 mat2x2_f32;
+  /* 0x00b8 */ tint_array<int8_t, 8> tint_pad_6;
+  /* 0x00c0 */ float2x3 mat2x3_f32;
+  /* 0x00e0 */ float2x4 mat2x4_f32;
+  /* 0x0100 */ float3x2 mat3x2_f32;
+  /* 0x0118 */ tint_array<int8_t, 8> tint_pad_7;
+  /* 0x0120 */ float3x3 mat3x3_f32;
+  /* 0x0150 */ float3x4 mat3x4_f32;
+  /* 0x0180 */ float4x2 mat4x2_f32;
+  /* 0x01a0 */ float4x3 mat4x3_f32;
+  /* 0x01e0 */ float4x4 mat4x4_f32;
+  /* 0x0220 */ half2x2 mat2x2_f16;
+  /* 0x0228 */ half2x3 mat2x3_f16;
+  /* 0x0238 */ half2x4 mat2x4_f16;
+  /* 0x0248 */ half3x2 mat3x2_f16;
+  /* 0x0254 */ tint_array<int8_t, 4> tint_pad_8;
+  /* 0x0258 */ half3x3 mat3x3_f16;
+  /* 0x0270 */ half3x4 mat3x4_f16;
+  /* 0x0288 */ half4x2 mat4x2_f16;
+  /* 0x0298 */ half4x3 mat4x3_f16;
+  /* 0x02b8 */ half4x4 mat4x4_f16;
+  /* 0x02d8 */ tint_array<int8_t, 8> tint_pad_9;
+  /* 0x02e0 */ tint_array<float3, 2> arr2_vec3_f32;
+  /* 0x0300 */ tint_array<half4x2, 2> arr2_mat4x2_f16;
+};
+
+struct S {
+  /* 0x0000 */ tint_array<Inner, 1> arr;
+};
+
+void tint_symbol_inner(uint idx, const device S* const tint_symbol_1) {
+  float const scalar_f32 = (*(tint_symbol_1)).arr[idx].scalar_f32;
+  int const scalar_i32 = (*(tint_symbol_1)).arr[idx].scalar_i32;
+  uint const scalar_u32 = (*(tint_symbol_1)).arr[idx].scalar_u32;
+  half const scalar_f16 = (*(tint_symbol_1)).arr[idx].scalar_f16;
+  float2 const vec2_f32 = (*(tint_symbol_1)).arr[idx].vec2_f32;
+  int2 const vec2_i32 = (*(tint_symbol_1)).arr[idx].vec2_i32;
+  uint2 const vec2_u32 = (*(tint_symbol_1)).arr[idx].vec2_u32;
+  half2 const vec2_f16 = (*(tint_symbol_1)).arr[idx].vec2_f16;
+  float3 const vec3_f32 = float3((*(tint_symbol_1)).arr[idx].vec3_f32);
+  int3 const vec3_i32 = int3((*(tint_symbol_1)).arr[idx].vec3_i32);
+  uint3 const vec3_u32 = uint3((*(tint_symbol_1)).arr[idx].vec3_u32);
+  half3 const vec3_f16 = half3((*(tint_symbol_1)).arr[idx].vec3_f16);
+  float4 const vec4_f32 = (*(tint_symbol_1)).arr[idx].vec4_f32;
+  int4 const vec4_i32 = (*(tint_symbol_1)).arr[idx].vec4_i32;
+  uint4 const vec4_u32 = (*(tint_symbol_1)).arr[idx].vec4_u32;
+  half4 const vec4_f16 = (*(tint_symbol_1)).arr[idx].vec4_f16;
+  float2x2 const mat2x2_f32 = (*(tint_symbol_1)).arr[idx].mat2x2_f32;
+  float2x3 const mat2x3_f32 = (*(tint_symbol_1)).arr[idx].mat2x3_f32;
+  float2x4 const mat2x4_f32 = (*(tint_symbol_1)).arr[idx].mat2x4_f32;
+  float3x2 const mat3x2_f32 = (*(tint_symbol_1)).arr[idx].mat3x2_f32;
+  float3x3 const mat3x3_f32 = (*(tint_symbol_1)).arr[idx].mat3x3_f32;
+  float3x4 const mat3x4_f32 = (*(tint_symbol_1)).arr[idx].mat3x4_f32;
+  float4x2 const mat4x2_f32 = (*(tint_symbol_1)).arr[idx].mat4x2_f32;
+  float4x3 const mat4x3_f32 = (*(tint_symbol_1)).arr[idx].mat4x3_f32;
+  float4x4 const mat4x4_f32 = (*(tint_symbol_1)).arr[idx].mat4x4_f32;
+  half2x2 const mat2x2_f16 = (*(tint_symbol_1)).arr[idx].mat2x2_f16;
+  half2x3 const mat2x3_f16 = (*(tint_symbol_1)).arr[idx].mat2x3_f16;
+  half2x4 const mat2x4_f16 = (*(tint_symbol_1)).arr[idx].mat2x4_f16;
+  half3x2 const mat3x2_f16 = (*(tint_symbol_1)).arr[idx].mat3x2_f16;
+  half3x3 const mat3x3_f16 = (*(tint_symbol_1)).arr[idx].mat3x3_f16;
+  half3x4 const mat3x4_f16 = (*(tint_symbol_1)).arr[idx].mat3x4_f16;
+  half4x2 const mat4x2_f16 = (*(tint_symbol_1)).arr[idx].mat4x2_f16;
+  half4x3 const mat4x3_f16 = (*(tint_symbol_1)).arr[idx].mat4x3_f16;
+  half4x4 const mat4x4_f16 = (*(tint_symbol_1)).arr[idx].mat4x4_f16;
+  tint_array<float3, 2> const arr2_vec3_f32 = (*(tint_symbol_1)).arr[idx].arr2_vec3_f32;
+  tint_array<half4x2, 2> const arr2_mat4x2_f16 = (*(tint_symbol_1)).arr[idx].arr2_mat4x2_f16;
+}
+
+kernel void tint_symbol(const device S* tint_symbol_2 [[buffer(0)]], uint idx [[thread_index_in_threadgroup]]) {
+  tint_symbol_inner(idx, tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..d5f4b75
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.spvasm

@@ -0,0 +1,341 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 198
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %idx_1
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %idx_1 "idx_1"
+               OpName %S "S"
+               OpMemberName %S 0 "arr"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "scalar_f32"
+               OpMemberName %Inner 1 "scalar_i32"
+               OpMemberName %Inner 2 "scalar_u32"
+               OpMemberName %Inner 3 "scalar_f16"
+               OpMemberName %Inner 4 "vec2_f32"
+               OpMemberName %Inner 5 "vec2_i32"
+               OpMemberName %Inner 6 "vec2_u32"
+               OpMemberName %Inner 7 "vec2_f16"
+               OpMemberName %Inner 8 "vec3_f32"
+               OpMemberName %Inner 9 "vec3_i32"
+               OpMemberName %Inner 10 "vec3_u32"
+               OpMemberName %Inner 11 "vec3_f16"
+               OpMemberName %Inner 12 "vec4_f32"
+               OpMemberName %Inner 13 "vec4_i32"
+               OpMemberName %Inner 14 "vec4_u32"
+               OpMemberName %Inner 15 "vec4_f16"
+               OpMemberName %Inner 16 "mat2x2_f32"
+               OpMemberName %Inner 17 "mat2x3_f32"
+               OpMemberName %Inner 18 "mat2x4_f32"
+               OpMemberName %Inner 19 "mat3x2_f32"
+               OpMemberName %Inner 20 "mat3x3_f32"
+               OpMemberName %Inner 21 "mat3x4_f32"
+               OpMemberName %Inner 22 "mat4x2_f32"
+               OpMemberName %Inner 23 "mat4x3_f32"
+               OpMemberName %Inner 24 "mat4x4_f32"
+               OpMemberName %Inner 25 "mat2x2_f16"
+               OpMemberName %Inner 26 "mat2x3_f16"
+               OpMemberName %Inner 27 "mat2x4_f16"
+               OpMemberName %Inner 28 "mat3x2_f16"
+               OpMemberName %Inner 29 "mat3x3_f16"
+               OpMemberName %Inner 30 "mat3x4_f16"
+               OpMemberName %Inner 31 "mat4x2_f16"
+               OpMemberName %Inner 32 "mat4x3_f16"
+               OpMemberName %Inner 33 "mat4x4_f16"
+               OpMemberName %Inner 34 "arr2_vec3_f32"
+               OpMemberName %Inner 35 "arr2_mat4x2_f16"
+               OpName %sb "sb"
+               OpName %main_inner "main_inner"
+               OpName %idx "idx"
+               OpName %main "main"
+               OpDecorate %idx_1 BuiltIn LocalInvocationIndex
+               OpDecorate %S Block
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 1 Offset 4
+               OpMemberDecorate %Inner 2 Offset 8
+               OpMemberDecorate %Inner 3 Offset 12
+               OpMemberDecorate %Inner 4 Offset 16
+               OpMemberDecorate %Inner 5 Offset 24
+               OpMemberDecorate %Inner 6 Offset 32
+               OpMemberDecorate %Inner 7 Offset 40
+               OpMemberDecorate %Inner 8 Offset 48
+               OpMemberDecorate %Inner 9 Offset 64
+               OpMemberDecorate %Inner 10 Offset 80
+               OpMemberDecorate %Inner 11 Offset 96
+               OpMemberDecorate %Inner 12 Offset 112
+               OpMemberDecorate %Inner 13 Offset 128
+               OpMemberDecorate %Inner 14 Offset 144
+               OpMemberDecorate %Inner 15 Offset 160
+               OpMemberDecorate %Inner 16 Offset 168
+               OpMemberDecorate %Inner 16 ColMajor
+               OpMemberDecorate %Inner 16 MatrixStride 8
+               OpMemberDecorate %Inner 17 Offset 192
+               OpMemberDecorate %Inner 17 ColMajor
+               OpMemberDecorate %Inner 17 MatrixStride 16
+               OpMemberDecorate %Inner 18 Offset 224
+               OpMemberDecorate %Inner 18 ColMajor
+               OpMemberDecorate %Inner 18 MatrixStride 16
+               OpMemberDecorate %Inner 19 Offset 256
+               OpMemberDecorate %Inner 19 ColMajor
+               OpMemberDecorate %Inner 19 MatrixStride 8
+               OpMemberDecorate %Inner 20 Offset 288
+               OpMemberDecorate %Inner 20 ColMajor
+               OpMemberDecorate %Inner 20 MatrixStride 16
+               OpMemberDecorate %Inner 21 Offset 336
+               OpMemberDecorate %Inner 21 ColMajor
+               OpMemberDecorate %Inner 21 MatrixStride 16
+               OpMemberDecorate %Inner 22 Offset 384
+               OpMemberDecorate %Inner 22 ColMajor
+               OpMemberDecorate %Inner 22 MatrixStride 8
+               OpMemberDecorate %Inner 23 Offset 416
+               OpMemberDecorate %Inner 23 ColMajor
+               OpMemberDecorate %Inner 23 MatrixStride 16
+               OpMemberDecorate %Inner 24 Offset 480
+               OpMemberDecorate %Inner 24 ColMajor
+               OpMemberDecorate %Inner 24 MatrixStride 16
+               OpMemberDecorate %Inner 25 Offset 544
+               OpMemberDecorate %Inner 25 ColMajor
+               OpMemberDecorate %Inner 25 MatrixStride 4
+               OpMemberDecorate %Inner 26 Offset 552
+               OpMemberDecorate %Inner 26 ColMajor
+               OpMemberDecorate %Inner 26 MatrixStride 8
+               OpMemberDecorate %Inner 27 Offset 568
+               OpMemberDecorate %Inner 27 ColMajor
+               OpMemberDecorate %Inner 27 MatrixStride 8
+               OpMemberDecorate %Inner 28 Offset 584
+               OpMemberDecorate %Inner 28 ColMajor
+               OpMemberDecorate %Inner 28 MatrixStride 4
+               OpMemberDecorate %Inner 29 Offset 600
+               OpMemberDecorate %Inner 29 ColMajor
+               OpMemberDecorate %Inner 29 MatrixStride 8
+               OpMemberDecorate %Inner 30 Offset 624
+               OpMemberDecorate %Inner 30 ColMajor
+               OpMemberDecorate %Inner 30 MatrixStride 8
+               OpMemberDecorate %Inner 31 Offset 648
+               OpMemberDecorate %Inner 31 ColMajor
+               OpMemberDecorate %Inner 31 MatrixStride 4
+               OpMemberDecorate %Inner 32 Offset 664
+               OpMemberDecorate %Inner 32 ColMajor
+               OpMemberDecorate %Inner 32 MatrixStride 8
+               OpMemberDecorate %Inner 33 Offset 696
+               OpMemberDecorate %Inner 33 ColMajor
+               OpMemberDecorate %Inner 33 MatrixStride 8
+               OpMemberDecorate %Inner 34 Offset 736
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpMemberDecorate %Inner 35 Offset 768
+               OpMemberDecorate %Inner 35 ColMajor
+               OpMemberDecorate %Inner 35 MatrixStride 4
+               OpDecorate %_arr_mat4v2half_uint_2 ArrayStride 16
+               OpDecorate %_runtimearr_Inner ArrayStride 800
+               OpDecorate %sb NonWritable
+               OpDecorate %sb Binding 0
+               OpDecorate %sb DescriptorSet 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+      %idx_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+    %v2float = OpTypeVector %float 2
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+     %v2half = OpTypeVector %half 2
+    %v3float = OpTypeVector %float 3
+      %v3int = OpTypeVector %int 3
+     %v3uint = OpTypeVector %uint 3
+     %v3half = OpTypeVector %half 3
+    %v4float = OpTypeVector %float 4
+      %v4int = OpTypeVector %int 4
+     %v4uint = OpTypeVector %uint 4
+     %v4half = OpTypeVector %half 4
+%mat2v2float = OpTypeMatrix %v2float 2
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v2float = OpTypeMatrix %v2float 4
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+ %mat2v2half = OpTypeMatrix %v2half 2
+ %mat2v3half = OpTypeMatrix %v3half 2
+ %mat2v4half = OpTypeMatrix %v4half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+ %mat3v3half = OpTypeMatrix %v3half 3
+ %mat3v4half = OpTypeMatrix %v4half 3
+ %mat4v2half = OpTypeMatrix %v2half 4
+ %mat4v3half = OpTypeMatrix %v3half 4
+ %mat4v4half = OpTypeMatrix %v4half 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+%_arr_mat4v2half_uint_2 = OpTypeArray %mat4v2half %uint_2
+      %Inner = OpTypeStruct %float %int %uint %half %v2float %v2int %v2uint %v2half %v3float %v3int %v3uint %v3half %v4float %v4int %v4uint %v4half %mat2v2float %mat2v3float %mat2v4float %mat3v2float %mat3v3float %mat3v4float %mat4v2float %mat4v3float %mat4v4float %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half %_arr_v3float_uint_2 %_arr_mat4v2half_uint_2
+%_runtimearr_Inner = OpTypeRuntimeArray %Inner
+          %S = OpTypeStruct %_runtimearr_Inner
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+         %sb = OpVariable %_ptr_StorageBuffer_S StorageBuffer
+       %void = OpTypeVoid
+         %45 = OpTypeFunction %void %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_3 = OpConstant %uint 3
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_4 = OpConstant %uint 4
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+     %uint_5 = OpConstant %uint 5
+%_ptr_StorageBuffer_v2int = OpTypePointer StorageBuffer %v2int
+     %uint_6 = OpConstant %uint 6
+%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
+     %uint_7 = OpConstant %uint 7
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+     %uint_8 = OpConstant %uint 8
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+     %uint_9 = OpConstant %uint 9
+%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
+    %uint_10 = OpConstant %uint 10
+%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
+    %uint_11 = OpConstant %uint 11
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+    %uint_12 = OpConstant %uint 12
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+    %uint_13 = OpConstant %uint 13
+%_ptr_StorageBuffer_v4int = OpTypePointer StorageBuffer %v4int
+    %uint_14 = OpConstant %uint 14
+%_ptr_StorageBuffer_v4uint = OpTypePointer StorageBuffer %v4uint
+    %uint_15 = OpConstant %uint 15
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+    %uint_16 = OpConstant %uint 16
+%_ptr_StorageBuffer_mat2v2float = OpTypePointer StorageBuffer %mat2v2float
+    %uint_17 = OpConstant %uint 17
+%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+    %uint_18 = OpConstant %uint 18
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+    %uint_19 = OpConstant %uint 19
+%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+    %uint_20 = OpConstant %uint 20
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+    %uint_21 = OpConstant %uint 21
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+    %uint_22 = OpConstant %uint 22
+%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
+    %uint_23 = OpConstant %uint 23
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+    %uint_24 = OpConstant %uint 24
+%_ptr_StorageBuffer_mat4v4float = OpTypePointer StorageBuffer %mat4v4float
+    %uint_25 = OpConstant %uint 25
+%_ptr_StorageBuffer_mat2v2half = OpTypePointer StorageBuffer %mat2v2half
+    %uint_26 = OpConstant %uint 26
+%_ptr_StorageBuffer_mat2v3half = OpTypePointer StorageBuffer %mat2v3half
+    %uint_27 = OpConstant %uint 27
+%_ptr_StorageBuffer_mat2v4half = OpTypePointer StorageBuffer %mat2v4half
+    %uint_28 = OpConstant %uint 28
+%_ptr_StorageBuffer_mat3v2half = OpTypePointer StorageBuffer %mat3v2half
+    %uint_29 = OpConstant %uint 29
+%_ptr_StorageBuffer_mat3v3half = OpTypePointer StorageBuffer %mat3v3half
+    %uint_30 = OpConstant %uint 30
+%_ptr_StorageBuffer_mat3v4half = OpTypePointer StorageBuffer %mat3v4half
+    %uint_31 = OpConstant %uint 31
+%_ptr_StorageBuffer_mat4v2half = OpTypePointer StorageBuffer %mat4v2half
+    %uint_32 = OpConstant %uint 32
+%_ptr_StorageBuffer_mat4v3half = OpTypePointer StorageBuffer %mat4v3half
+    %uint_33 = OpConstant %uint 33
+%_ptr_StorageBuffer_mat4v4half = OpTypePointer StorageBuffer %mat4v4half
+    %uint_34 = OpConstant %uint 34
+%_ptr_StorageBuffer__arr_v3float_uint_2 = OpTypePointer StorageBuffer %_arr_v3float_uint_2
+    %uint_35 = OpConstant %uint 35
+%_ptr_StorageBuffer__arr_mat4v2half_uint_2 = OpTypePointer StorageBuffer %_arr_mat4v2half_uint_2
+        %193 = OpTypeFunction %void
+ %main_inner = OpFunction %void None %45
+        %idx = OpFunctionParameter %uint
+         %49 = OpLabel
+         %52 = OpAccessChain %_ptr_StorageBuffer_float %sb %uint_0 %idx %uint_0
+         %53 = OpLoad %float %52
+         %56 = OpAccessChain %_ptr_StorageBuffer_int %sb %uint_0 %idx %uint_1
+         %57 = OpLoad %int %56
+         %59 = OpAccessChain %_ptr_StorageBuffer_uint %sb %uint_0 %idx %uint_2
+         %60 = OpLoad %uint %59
+         %63 = OpAccessChain %_ptr_StorageBuffer_half %sb %uint_0 %idx %uint_3
+         %64 = OpLoad %half %63
+         %67 = OpAccessChain %_ptr_StorageBuffer_v2float %sb %uint_0 %idx %uint_4
+         %68 = OpLoad %v2float %67
+         %71 = OpAccessChain %_ptr_StorageBuffer_v2int %sb %uint_0 %idx %uint_5
+         %72 = OpLoad %v2int %71
+         %75 = OpAccessChain %_ptr_StorageBuffer_v2uint %sb %uint_0 %idx %uint_6
+         %76 = OpLoad %v2uint %75
+         %79 = OpAccessChain %_ptr_StorageBuffer_v2half %sb %uint_0 %idx %uint_7
+         %80 = OpLoad %v2half %79
+         %83 = OpAccessChain %_ptr_StorageBuffer_v3float %sb %uint_0 %idx %uint_8
+         %84 = OpLoad %v3float %83
+         %87 = OpAccessChain %_ptr_StorageBuffer_v3int %sb %uint_0 %idx %uint_9
+         %88 = OpLoad %v3int %87
+         %91 = OpAccessChain %_ptr_StorageBuffer_v3uint %sb %uint_0 %idx %uint_10
+         %92 = OpLoad %v3uint %91
+         %95 = OpAccessChain %_ptr_StorageBuffer_v3half %sb %uint_0 %idx %uint_11
+         %96 = OpLoad %v3half %95
+         %99 = OpAccessChain %_ptr_StorageBuffer_v4float %sb %uint_0 %idx %uint_12
+        %100 = OpLoad %v4float %99
+        %103 = OpAccessChain %_ptr_StorageBuffer_v4int %sb %uint_0 %idx %uint_13
+        %104 = OpLoad %v4int %103
+        %107 = OpAccessChain %_ptr_StorageBuffer_v4uint %sb %uint_0 %idx %uint_14
+        %108 = OpLoad %v4uint %107
+        %111 = OpAccessChain %_ptr_StorageBuffer_v4half %sb %uint_0 %idx %uint_15
+        %112 = OpLoad %v4half %111
+        %115 = OpAccessChain %_ptr_StorageBuffer_mat2v2float %sb %uint_0 %idx %uint_16
+        %116 = OpLoad %mat2v2float %115
+        %119 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %sb %uint_0 %idx %uint_17
+        %120 = OpLoad %mat2v3float %119
+        %123 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %sb %uint_0 %idx %uint_18
+        %124 = OpLoad %mat2v4float %123
+        %127 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %sb %uint_0 %idx %uint_19
+        %128 = OpLoad %mat3v2float %127
+        %131 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %sb %uint_0 %idx %uint_20
+        %132 = OpLoad %mat3v3float %131
+        %135 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %sb %uint_0 %idx %uint_21
+        %136 = OpLoad %mat3v4float %135
+        %139 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %sb %uint_0 %idx %uint_22
+        %140 = OpLoad %mat4v2float %139
+        %143 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %sb %uint_0 %idx %uint_23
+        %144 = OpLoad %mat4v3float %143
+        %147 = OpAccessChain %_ptr_StorageBuffer_mat4v4float %sb %uint_0 %idx %uint_24
+        %148 = OpLoad %mat4v4float %147
+        %151 = OpAccessChain %_ptr_StorageBuffer_mat2v2half %sb %uint_0 %idx %uint_25
+        %152 = OpLoad %mat2v2half %151
+        %155 = OpAccessChain %_ptr_StorageBuffer_mat2v3half %sb %uint_0 %idx %uint_26
+        %156 = OpLoad %mat2v3half %155
+        %159 = OpAccessChain %_ptr_StorageBuffer_mat2v4half %sb %uint_0 %idx %uint_27
+        %160 = OpLoad %mat2v4half %159
+        %163 = OpAccessChain %_ptr_StorageBuffer_mat3v2half %sb %uint_0 %idx %uint_28
+        %164 = OpLoad %mat3v2half %163
+        %167 = OpAccessChain %_ptr_StorageBuffer_mat3v3half %sb %uint_0 %idx %uint_29
+        %168 = OpLoad %mat3v3half %167
+        %171 = OpAccessChain %_ptr_StorageBuffer_mat3v4half %sb %uint_0 %idx %uint_30
+        %172 = OpLoad %mat3v4half %171
+        %175 = OpAccessChain %_ptr_StorageBuffer_mat4v2half %sb %uint_0 %idx %uint_31
+        %176 = OpLoad %mat4v2half %175
+        %179 = OpAccessChain %_ptr_StorageBuffer_mat4v3half %sb %uint_0 %idx %uint_32
+        %180 = OpLoad %mat4v3half %179
+        %183 = OpAccessChain %_ptr_StorageBuffer_mat4v4half %sb %uint_0 %idx %uint_33
+        %184 = OpLoad %mat4v4half %183
+        %187 = OpAccessChain %_ptr_StorageBuffer__arr_v3float_uint_2 %sb %uint_0 %idx %uint_34
+        %188 = OpLoad %_arr_v3float_uint_2 %187
+        %191 = OpAccessChain %_ptr_StorageBuffer__arr_mat4v2half_uint_2 %sb %uint_0 %idx %uint_35
+        %192 = OpLoad %_arr_mat4v2half_uint_2 %191
+               OpReturn
+               OpFunctionEnd
+       %main = OpFunction %void None %193
+        %195 = OpLabel
+        %197 = OpLoad %uint %idx_1
+        %196 = OpFunctionCall %void %main_inner %197
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..5a5e16b
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/read_f16.wgsl.expected.wgsl

@@ -0,0 +1,86 @@
+enable f16;
+
+struct Inner {
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+}
+
+struct S {
+  arr : array<Inner>,
+}
+
+@binding(0) @group(0) var<storage, read> sb : S;
+
+@compute @workgroup_size(1)
+fn main(@builtin(local_invocation_index) idx : u32) {
+  let scalar_f32 : f32 = sb.arr[idx].scalar_f32;
+  let scalar_i32 : i32 = sb.arr[idx].scalar_i32;
+  let scalar_u32 : u32 = sb.arr[idx].scalar_u32;
+  let scalar_f16 : f16 = sb.arr[idx].scalar_f16;
+  let vec2_f32 : vec2<f32> = sb.arr[idx].vec2_f32;
+  let vec2_i32 : vec2<i32> = sb.arr[idx].vec2_i32;
+  let vec2_u32 : vec2<u32> = sb.arr[idx].vec2_u32;
+  let vec2_f16 : vec2<f16> = sb.arr[idx].vec2_f16;
+  let vec3_f32 : vec3<f32> = sb.arr[idx].vec3_f32;
+  let vec3_i32 : vec3<i32> = sb.arr[idx].vec3_i32;
+  let vec3_u32 : vec3<u32> = sb.arr[idx].vec3_u32;
+  let vec3_f16 : vec3<f16> = sb.arr[idx].vec3_f16;
+  let vec4_f32 : vec4<f32> = sb.arr[idx].vec4_f32;
+  let vec4_i32 : vec4<i32> = sb.arr[idx].vec4_i32;
+  let vec4_u32 : vec4<u32> = sb.arr[idx].vec4_u32;
+  let vec4_f16 : vec4<f16> = sb.arr[idx].vec4_f16;
+  let mat2x2_f32 : mat2x2<f32> = sb.arr[idx].mat2x2_f32;
+  let mat2x3_f32 : mat2x3<f32> = sb.arr[idx].mat2x3_f32;
+  let mat2x4_f32 : mat2x4<f32> = sb.arr[idx].mat2x4_f32;
+  let mat3x2_f32 : mat3x2<f32> = sb.arr[idx].mat3x2_f32;
+  let mat3x3_f32 : mat3x3<f32> = sb.arr[idx].mat3x3_f32;
+  let mat3x4_f32 : mat3x4<f32> = sb.arr[idx].mat3x4_f32;
+  let mat4x2_f32 : mat4x2<f32> = sb.arr[idx].mat4x2_f32;
+  let mat4x3_f32 : mat4x3<f32> = sb.arr[idx].mat4x3_f32;
+  let mat4x4_f32 : mat4x4<f32> = sb.arr[idx].mat4x4_f32;
+  let mat2x2_f16 : mat2x2<f16> = sb.arr[idx].mat2x2_f16;
+  let mat2x3_f16 : mat2x3<f16> = sb.arr[idx].mat2x3_f16;
+  let mat2x4_f16 : mat2x4<f16> = sb.arr[idx].mat2x4_f16;
+  let mat3x2_f16 : mat3x2<f16> = sb.arr[idx].mat3x2_f16;
+  let mat3x3_f16 : mat3x3<f16> = sb.arr[idx].mat3x3_f16;
+  let mat3x4_f16 : mat3x4<f16> = sb.arr[idx].mat3x4_f16;
+  let mat4x2_f16 : mat4x2<f16> = sb.arr[idx].mat4x2_f16;
+  let mat4x3_f16 : mat4x3<f16> = sb.arr[idx].mat4x3_f16;
+  let mat4x4_f16 : mat4x4<f16> = sb.arr[idx].mat4x4_f16;
+  let arr2_vec3_f32 : array<vec3<f32>, 2> = sb.arr[idx].arr2_vec3_f32;
+  let arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = sb.arr[idx].arr2_mat4x2_f16;
+}

diff --git a/test/tint/buffer/storage/dynamic_index/write.wgsl b/test/tint/buffer/storage/dynamic_index/write.wgsl
index 38f202c..454a809 100644
--- a/test/tint/buffer/storage/dynamic_index/write.wgsl
+++ b/test/tint/buffer/storage/dynamic_index/write.wgsl

@@ -1,30 +1,57 @@
 struct Inner {
-    a : vec3<i32>,
-    b : i32,
-    c : vec3<u32>,
-    d : u32,
-    e : vec3<f32>,
-    f : f32,
-    g : mat2x3<f32>,
-    h : mat3x2<f32>,
-    i : array<vec4<i32>, 4>,
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    arr2_vec3_f32 : array<vec3<f32>, 2>,
+
 };
 
 struct S {
     arr : array<Inner>,
 };
 
-@binding(0) @group(0) var<storage, read_write> s : S;
+@binding(0) @group(0) var<storage, read_write> sb : S;
 
 @compute @workgroup_size(1)
 fn main(@builtin(local_invocation_index) idx : u32) {
-    s.arr[idx].a = vec3<i32>();
-    s.arr[idx].b = i32();
-    s.arr[idx].c = vec3<u32>();
-    s.arr[idx].d = u32();
-    s.arr[idx].e = vec3<f32>();
-    s.arr[idx].f = f32();
-    s.arr[idx].g = mat2x3<f32>();
-    s.arr[idx].h = mat3x2<f32>();
-    s.arr[idx].i = array<vec4<i32>, 4>();
+    sb.arr[idx].scalar_f32 = f32();
+    sb.arr[idx].scalar_i32 = i32();
+    sb.arr[idx].scalar_u32 = u32();
+    sb.arr[idx].vec2_f32 = vec2<f32>();
+    sb.arr[idx].vec2_i32 = vec2<i32>();
+    sb.arr[idx].vec2_u32 = vec2<u32>();
+    sb.arr[idx].vec3_f32 = vec3<f32>();
+    sb.arr[idx].vec3_i32 = vec3<i32>();
+    sb.arr[idx].vec3_u32 = vec3<u32>();
+    sb.arr[idx].vec4_f32 = vec4<f32>();
+    sb.arr[idx].vec4_i32 = vec4<i32>();
+    sb.arr[idx].vec4_u32 = vec4<u32>();
+    sb.arr[idx].mat2x2_f32 = mat2x2<f32>();
+    sb.arr[idx].mat2x3_f32 = mat2x3<f32>();
+    sb.arr[idx].mat2x4_f32 = mat2x4<f32>();
+    sb.arr[idx].mat3x2_f32 = mat3x2<f32>();
+    sb.arr[idx].mat3x3_f32 = mat3x3<f32>();
+    sb.arr[idx].mat3x4_f32 = mat3x4<f32>();
+    sb.arr[idx].mat4x2_f32 = mat4x2<f32>();
+    sb.arr[idx].mat4x3_f32 = mat4x3<f32>();
+    sb.arr[idx].mat4x4_f32 = mat4x4<f32>();
+    sb.arr[idx].arr2_vec3_f32 = array<vec3<f32>, 2>();
 }

diff --git a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.dxc.hlsl
index 87148c2..4bcc56d 100644
--- a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.dxc.hlsl

@@ -1,40 +1,96 @@
-RWByteAddressBuffer s : register(u0, space0);
+RWByteAddressBuffer sb : register(u0, space0);
 
 struct tint_symbol_1 {
   uint idx : SV_GroupIndex;
 };
 
-void tint_symbol_8(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+void tint_symbol_14(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_15(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
 }
 
-void tint_symbol_9(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+void tint_symbol_16(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_17(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
   buffer.Store2((offset + 0u), asuint(value[0u]));
   buffer.Store2((offset + 8u), asuint(value[1u]));
   buffer.Store2((offset + 16u), asuint(value[2u]));
 }
 
-void tint_symbol_11(RWByteAddressBuffer buffer, uint offset, int4 value[4]) {
-  int4 array[4] = value;
+void tint_symbol_18(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_19(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_20(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_21(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_22(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_23(RWByteAddressBuffer buffer, uint offset, float3 value[2]) {
+  float3 array[2] = value;
   {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      buffer.Store4((offset + (i_1 * 16u)), asuint(array[i_1]));
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      buffer.Store3((offset + (i * 16u)), asuint(array[i]));
     }
   }
 }
 
 void main_inner(uint idx) {
-  s.Store3((176u * idx), asuint((0).xxx));
-  s.Store(((176u * idx) + 12u), asuint(0));
-  s.Store3(((176u * idx) + 16u), asuint((0u).xxx));
-  s.Store(((176u * idx) + 28u), asuint(0u));
-  s.Store3(((176u * idx) + 32u), asuint((0.0f).xxx));
-  s.Store(((176u * idx) + 44u), asuint(0.0f));
-  tint_symbol_8(s, ((176u * idx) + 48u), float2x3((0.0f).xxx, (0.0f).xxx));
-  tint_symbol_9(s, ((176u * idx) + 80u), float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
-  const int4 tint_symbol_13[4] = (int4[4])0;
-  tint_symbol_11(s, ((176u * idx) + 112u), tint_symbol_13);
+  sb.Store((544u * idx), asuint(0.0f));
+  sb.Store(((544u * idx) + 4u), asuint(0));
+  sb.Store(((544u * idx) + 8u), asuint(0u));
+  sb.Store2(((544u * idx) + 16u), asuint((0.0f).xx));
+  sb.Store2(((544u * idx) + 24u), asuint((0).xx));
+  sb.Store2(((544u * idx) + 32u), asuint((0u).xx));
+  sb.Store3(((544u * idx) + 48u), asuint((0.0f).xxx));
+  sb.Store3(((544u * idx) + 64u), asuint((0).xxx));
+  sb.Store3(((544u * idx) + 80u), asuint((0u).xxx));
+  sb.Store4(((544u * idx) + 96u), asuint((0.0f).xxxx));
+  sb.Store4(((544u * idx) + 112u), asuint((0).xxxx));
+  sb.Store4(((544u * idx) + 128u), asuint((0u).xxxx));
+  tint_symbol_14(sb, ((544u * idx) + 144u), float2x2((0.0f).xx, (0.0f).xx));
+  tint_symbol_15(sb, ((544u * idx) + 160u), float2x3((0.0f).xxx, (0.0f).xxx));
+  tint_symbol_16(sb, ((544u * idx) + 192u), float2x4((0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_17(sb, ((544u * idx) + 224u), float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_18(sb, ((544u * idx) + 256u), float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_19(sb, ((544u * idx) + 304u), float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_20(sb, ((544u * idx) + 352u), float4x2((0.0f).xx, (0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_21(sb, ((544u * idx) + 384u), float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_22(sb, ((544u * idx) + 448u), float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  const float3 tint_symbol_24[2] = (float3[2])0;
+  tint_symbol_23(sb, ((544u * idx) + 512u), tint_symbol_24);
 }
 
 [numthreads(1, 1, 1)]

diff --git a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.fxc.hlsl
index 87148c2..4bcc56d 100644
--- a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.fxc.hlsl

@@ -1,40 +1,96 @@
-RWByteAddressBuffer s : register(u0, space0);
+RWByteAddressBuffer sb : register(u0, space0);
 
 struct tint_symbol_1 {
   uint idx : SV_GroupIndex;
 };
 
-void tint_symbol_8(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+void tint_symbol_14(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_15(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
 }
 
-void tint_symbol_9(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+void tint_symbol_16(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_17(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
   buffer.Store2((offset + 0u), asuint(value[0u]));
   buffer.Store2((offset + 8u), asuint(value[1u]));
   buffer.Store2((offset + 16u), asuint(value[2u]));
 }
 
-void tint_symbol_11(RWByteAddressBuffer buffer, uint offset, int4 value[4]) {
-  int4 array[4] = value;
+void tint_symbol_18(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_19(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_20(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_21(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_22(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_23(RWByteAddressBuffer buffer, uint offset, float3 value[2]) {
+  float3 array[2] = value;
   {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      buffer.Store4((offset + (i_1 * 16u)), asuint(array[i_1]));
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      buffer.Store3((offset + (i * 16u)), asuint(array[i]));
     }
   }
 }
 
 void main_inner(uint idx) {
-  s.Store3((176u * idx), asuint((0).xxx));
-  s.Store(((176u * idx) + 12u), asuint(0));
-  s.Store3(((176u * idx) + 16u), asuint((0u).xxx));
-  s.Store(((176u * idx) + 28u), asuint(0u));
-  s.Store3(((176u * idx) + 32u), asuint((0.0f).xxx));
-  s.Store(((176u * idx) + 44u), asuint(0.0f));
-  tint_symbol_8(s, ((176u * idx) + 48u), float2x3((0.0f).xxx, (0.0f).xxx));
-  tint_symbol_9(s, ((176u * idx) + 80u), float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
-  const int4 tint_symbol_13[4] = (int4[4])0;
-  tint_symbol_11(s, ((176u * idx) + 112u), tint_symbol_13);
+  sb.Store((544u * idx), asuint(0.0f));
+  sb.Store(((544u * idx) + 4u), asuint(0));
+  sb.Store(((544u * idx) + 8u), asuint(0u));
+  sb.Store2(((544u * idx) + 16u), asuint((0.0f).xx));
+  sb.Store2(((544u * idx) + 24u), asuint((0).xx));
+  sb.Store2(((544u * idx) + 32u), asuint((0u).xx));
+  sb.Store3(((544u * idx) + 48u), asuint((0.0f).xxx));
+  sb.Store3(((544u * idx) + 64u), asuint((0).xxx));
+  sb.Store3(((544u * idx) + 80u), asuint((0u).xxx));
+  sb.Store4(((544u * idx) + 96u), asuint((0.0f).xxxx));
+  sb.Store4(((544u * idx) + 112u), asuint((0).xxxx));
+  sb.Store4(((544u * idx) + 128u), asuint((0u).xxxx));
+  tint_symbol_14(sb, ((544u * idx) + 144u), float2x2((0.0f).xx, (0.0f).xx));
+  tint_symbol_15(sb, ((544u * idx) + 160u), float2x3((0.0f).xxx, (0.0f).xxx));
+  tint_symbol_16(sb, ((544u * idx) + 192u), float2x4((0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_17(sb, ((544u * idx) + 224u), float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_18(sb, ((544u * idx) + 256u), float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_19(sb, ((544u * idx) + 304u), float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_20(sb, ((544u * idx) + 352u), float4x2((0.0f).xx, (0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_21(sb, ((544u * idx) + 384u), float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_22(sb, ((544u * idx) + 448u), float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  const float3 tint_symbol_24[2] = (float3[2])0;
+  tint_symbol_23(sb, ((544u * idx) + 512u), tint_symbol_24);
 }
 
 [numthreads(1, 1, 1)]

diff --git a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.glsl b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.glsl
index f26e111..5b3652e 100644
--- a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.glsl
+++ b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.glsl

@@ -1,34 +1,66 @@
 #version 310 es
 
 struct Inner {
-  ivec3 a;
-  int b;
-  uvec3 c;
-  uint d;
-  vec3 e;
-  float f;
-  mat2x3 g;
-  mat3x2 h;
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
   uint pad;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
   uint pad_1;
-  ivec4 i[4];
+  uint pad_2;
+  vec3 vec3_f32;
+  uint pad_3;
+  ivec3 vec3_i32;
+  uint pad_4;
+  uvec3 vec3_u32;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  mat2 mat2x2_f32;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_6;
+  uint pad_7;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  vec3 arr2_vec3_f32[2];
 };
 
 layout(binding = 0, std430) buffer S_ssbo {
   Inner arr[];
-} s;
+} sb;
 
 void tint_symbol(uint idx) {
-  s.arr[idx].a = ivec3(0);
-  s.arr[idx].b = 0;
-  s.arr[idx].c = uvec3(0u);
-  s.arr[idx].d = 0u;
-  s.arr[idx].e = vec3(0.0f);
-  s.arr[idx].f = 0.0f;
-  s.arr[idx].g = mat2x3(vec3(0.0f), vec3(0.0f));
-  s.arr[idx].h = mat3x2(vec2(0.0f), vec2(0.0f), vec2(0.0f));
-  ivec4 tint_symbol_1[4] = ivec4[4](ivec4(0), ivec4(0), ivec4(0), ivec4(0));
-  s.arr[idx].i = tint_symbol_1;
+  sb.arr[idx].scalar_f32 = 0.0f;
+  sb.arr[idx].scalar_i32 = 0;
+  sb.arr[idx].scalar_u32 = 0u;
+  sb.arr[idx].vec2_f32 = vec2(0.0f);
+  sb.arr[idx].vec2_i32 = ivec2(0);
+  sb.arr[idx].vec2_u32 = uvec2(0u);
+  sb.arr[idx].vec3_f32 = vec3(0.0f);
+  sb.arr[idx].vec3_i32 = ivec3(0);
+  sb.arr[idx].vec3_u32 = uvec3(0u);
+  sb.arr[idx].vec4_f32 = vec4(0.0f);
+  sb.arr[idx].vec4_i32 = ivec4(0);
+  sb.arr[idx].vec4_u32 = uvec4(0u);
+  sb.arr[idx].mat2x2_f32 = mat2(vec2(0.0f), vec2(0.0f));
+  sb.arr[idx].mat2x3_f32 = mat2x3(vec3(0.0f), vec3(0.0f));
+  sb.arr[idx].mat2x4_f32 = mat2x4(vec4(0.0f), vec4(0.0f));
+  sb.arr[idx].mat3x2_f32 = mat3x2(vec2(0.0f), vec2(0.0f), vec2(0.0f));
+  sb.arr[idx].mat3x3_f32 = mat3(vec3(0.0f), vec3(0.0f), vec3(0.0f));
+  sb.arr[idx].mat3x4_f32 = mat3x4(vec4(0.0f), vec4(0.0f), vec4(0.0f));
+  sb.arr[idx].mat4x2_f32 = mat4x2(vec2(0.0f), vec2(0.0f), vec2(0.0f), vec2(0.0f));
+  sb.arr[idx].mat4x3_f32 = mat4x3(vec3(0.0f), vec3(0.0f), vec3(0.0f), vec3(0.0f));
+  sb.arr[idx].mat4x4_f32 = mat4(vec4(0.0f), vec4(0.0f), vec4(0.0f), vec4(0.0f));
+  vec3 tint_symbol_1[2] = vec3[2](vec3(0.0f), vec3(0.0f));
+  sb.arr[idx].arr2_vec3_f32 = tint_symbol_1;
 }
 
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

diff --git a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.msl b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.msl
index bb82069..4d2823b 100644
--- a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.msl
+++ b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.msl

@@ -15,16 +15,34 @@
 };
 
 struct Inner {
-  /* 0x0000 */ packed_int3 a;
-  /* 0x000c */ int b;
-  /* 0x0010 */ packed_uint3 c;
-  /* 0x001c */ uint d;
-  /* 0x0020 */ packed_float3 e;
-  /* 0x002c */ float f;
-  /* 0x0030 */ float2x3 g;
-  /* 0x0050 */ float3x2 h;
-  /* 0x0068 */ tint_array<int8_t, 8> tint_pad;
-  /* 0x0070 */ tint_array<int4, 4> i;
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ tint_array<int8_t, 8> tint_pad_1;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_3;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0060 */ float4 vec4_f32;
+  /* 0x0070 */ int4 vec4_i32;
+  /* 0x0080 */ uint4 vec4_u32;
+  /* 0x0090 */ float2x2 mat2x2_f32;
+  /* 0x00a0 */ float2x3 mat2x3_f32;
+  /* 0x00c0 */ float2x4 mat2x4_f32;
+  /* 0x00e0 */ float3x2 mat3x2_f32;
+  /* 0x00f8 */ tint_array<int8_t, 8> tint_pad_5;
+  /* 0x0100 */ float3x3 mat3x3_f32;
+  /* 0x0130 */ float3x4 mat3x4_f32;
+  /* 0x0160 */ float4x2 mat4x2_f32;
+  /* 0x0180 */ float4x3 mat4x3_f32;
+  /* 0x01c0 */ float4x4 mat4x4_f32;
+  /* 0x0200 */ tint_array<float3, 2> arr2_vec3_f32;
 };
 
 struct S {
@@ -32,16 +50,29 @@
 };
 
 void tint_symbol_inner(uint idx, device S* const tint_symbol_2) {
-  (*(tint_symbol_2)).arr[idx].a = int3(0);
-  (*(tint_symbol_2)).arr[idx].b = 0;
-  (*(tint_symbol_2)).arr[idx].c = uint3(0u);
-  (*(tint_symbol_2)).arr[idx].d = 0u;
-  (*(tint_symbol_2)).arr[idx].e = float3(0.0f);
-  (*(tint_symbol_2)).arr[idx].f = 0.0f;
-  (*(tint_symbol_2)).arr[idx].g = float2x3(float3(0.0f), float3(0.0f));
-  (*(tint_symbol_2)).arr[idx].h = float3x2(float2(0.0f), float2(0.0f), float2(0.0f));
-  tint_array<int4, 4> const tint_symbol_1 = tint_array<int4, 4>{};
-  (*(tint_symbol_2)).arr[idx].i = tint_symbol_1;
+  (*(tint_symbol_2)).arr[idx].scalar_f32 = 0.0f;
+  (*(tint_symbol_2)).arr[idx].scalar_i32 = 0;
+  (*(tint_symbol_2)).arr[idx].scalar_u32 = 0u;
+  (*(tint_symbol_2)).arr[idx].vec2_f32 = float2(0.0f);
+  (*(tint_symbol_2)).arr[idx].vec2_i32 = int2(0);
+  (*(tint_symbol_2)).arr[idx].vec2_u32 = uint2(0u);
+  (*(tint_symbol_2)).arr[idx].vec3_f32 = float3(0.0f);
+  (*(tint_symbol_2)).arr[idx].vec3_i32 = int3(0);
+  (*(tint_symbol_2)).arr[idx].vec3_u32 = uint3(0u);
+  (*(tint_symbol_2)).arr[idx].vec4_f32 = float4(0.0f);
+  (*(tint_symbol_2)).arr[idx].vec4_i32 = int4(0);
+  (*(tint_symbol_2)).arr[idx].vec4_u32 = uint4(0u);
+  (*(tint_symbol_2)).arr[idx].mat2x2_f32 = float2x2(float2(0.0f), float2(0.0f));
+  (*(tint_symbol_2)).arr[idx].mat2x3_f32 = float2x3(float3(0.0f), float3(0.0f));
+  (*(tint_symbol_2)).arr[idx].mat2x4_f32 = float2x4(float4(0.0f), float4(0.0f));
+  (*(tint_symbol_2)).arr[idx].mat3x2_f32 = float3x2(float2(0.0f), float2(0.0f), float2(0.0f));
+  (*(tint_symbol_2)).arr[idx].mat3x3_f32 = float3x3(float3(0.0f), float3(0.0f), float3(0.0f));
+  (*(tint_symbol_2)).arr[idx].mat3x4_f32 = float3x4(float4(0.0f), float4(0.0f), float4(0.0f));
+  (*(tint_symbol_2)).arr[idx].mat4x2_f32 = float4x2(float2(0.0f), float2(0.0f), float2(0.0f), float2(0.0f));
+  (*(tint_symbol_2)).arr[idx].mat4x3_f32 = float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f));
+  (*(tint_symbol_2)).arr[idx].mat4x4_f32 = float4x4(float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f));
+  tint_array<float3, 2> const tint_symbol_1 = tint_array<float3, 2>{};
+  (*(tint_symbol_2)).arr[idx].arr2_vec3_f32 = tint_symbol_1;
 }
 
 kernel void tint_symbol(device S* tint_symbol_3 [[buffer(0)]], uint idx [[thread_index_in_threadgroup]]) {

diff --git a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.spvasm b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.spvasm
index c7d951c..2ec199c 100644
--- a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.spvasm
+++ b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.spvasm

@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 0
-; Bound: 65
+; Bound: 128
 ; Schema: 0
                OpCapability Shader
                OpMemoryModel Logical GLSL450
@@ -11,16 +11,29 @@
                OpName %S "S"
                OpMemberName %S 0 "arr"
                OpName %Inner "Inner"
-               OpMemberName %Inner 0 "a"
-               OpMemberName %Inner 1 "b"
-               OpMemberName %Inner 2 "c"
-               OpMemberName %Inner 3 "d"
-               OpMemberName %Inner 4 "e"
-               OpMemberName %Inner 5 "f"
-               OpMemberName %Inner 6 "g"
-               OpMemberName %Inner 7 "h"
-               OpMemberName %Inner 8 "i"
-               OpName %s "s"
+               OpMemberName %Inner 0 "scalar_f32"
+               OpMemberName %Inner 1 "scalar_i32"
+               OpMemberName %Inner 2 "scalar_u32"
+               OpMemberName %Inner 3 "vec2_f32"
+               OpMemberName %Inner 4 "vec2_i32"
+               OpMemberName %Inner 5 "vec2_u32"
+               OpMemberName %Inner 6 "vec3_f32"
+               OpMemberName %Inner 7 "vec3_i32"
+               OpMemberName %Inner 8 "vec3_u32"
+               OpMemberName %Inner 9 "vec4_f32"
+               OpMemberName %Inner 10 "vec4_i32"
+               OpMemberName %Inner 11 "vec4_u32"
+               OpMemberName %Inner 12 "mat2x2_f32"
+               OpMemberName %Inner 13 "mat2x3_f32"
+               OpMemberName %Inner 14 "mat2x4_f32"
+               OpMemberName %Inner 15 "mat3x2_f32"
+               OpMemberName %Inner 16 "mat3x3_f32"
+               OpMemberName %Inner 17 "mat3x4_f32"
+               OpMemberName %Inner 18 "mat4x2_f32"
+               OpMemberName %Inner 19 "mat4x3_f32"
+               OpMemberName %Inner 20 "mat4x4_f32"
+               OpMemberName %Inner 21 "arr2_vec3_f32"
+               OpName %sb "sb"
                OpName %main_inner "main_inner"
                OpName %idx "idx"
                OpName %main "main"
@@ -28,96 +41,199 @@
                OpDecorate %S Block
                OpMemberDecorate %S 0 Offset 0
                OpMemberDecorate %Inner 0 Offset 0
-               OpMemberDecorate %Inner 1 Offset 12
-               OpMemberDecorate %Inner 2 Offset 16
-               OpMemberDecorate %Inner 3 Offset 28
-               OpMemberDecorate %Inner 4 Offset 32
-               OpMemberDecorate %Inner 5 Offset 44
+               OpMemberDecorate %Inner 1 Offset 4
+               OpMemberDecorate %Inner 2 Offset 8
+               OpMemberDecorate %Inner 3 Offset 16
+               OpMemberDecorate %Inner 4 Offset 24
+               OpMemberDecorate %Inner 5 Offset 32
                OpMemberDecorate %Inner 6 Offset 48
-               OpMemberDecorate %Inner 6 ColMajor
-               OpMemberDecorate %Inner 6 MatrixStride 16
-               OpMemberDecorate %Inner 7 Offset 80
-               OpMemberDecorate %Inner 7 ColMajor
-               OpMemberDecorate %Inner 7 MatrixStride 8
-               OpMemberDecorate %Inner 8 Offset 112
-               OpDecorate %_arr_v4int_uint_4 ArrayStride 16
-               OpDecorate %_runtimearr_Inner ArrayStride 176
-               OpDecorate %s Binding 0
-               OpDecorate %s DescriptorSet 0
+               OpMemberDecorate %Inner 7 Offset 64
+               OpMemberDecorate %Inner 8 Offset 80
+               OpMemberDecorate %Inner 9 Offset 96
+               OpMemberDecorate %Inner 10 Offset 112
+               OpMemberDecorate %Inner 11 Offset 128
+               OpMemberDecorate %Inner 12 Offset 144
+               OpMemberDecorate %Inner 12 ColMajor
+               OpMemberDecorate %Inner 12 MatrixStride 8
+               OpMemberDecorate %Inner 13 Offset 160
+               OpMemberDecorate %Inner 13 ColMajor
+               OpMemberDecorate %Inner 13 MatrixStride 16
+               OpMemberDecorate %Inner 14 Offset 192
+               OpMemberDecorate %Inner 14 ColMajor
+               OpMemberDecorate %Inner 14 MatrixStride 16
+               OpMemberDecorate %Inner 15 Offset 224
+               OpMemberDecorate %Inner 15 ColMajor
+               OpMemberDecorate %Inner 15 MatrixStride 8
+               OpMemberDecorate %Inner 16 Offset 256
+               OpMemberDecorate %Inner 16 ColMajor
+               OpMemberDecorate %Inner 16 MatrixStride 16
+               OpMemberDecorate %Inner 17 Offset 304
+               OpMemberDecorate %Inner 17 ColMajor
+               OpMemberDecorate %Inner 17 MatrixStride 16
+               OpMemberDecorate %Inner 18 Offset 352
+               OpMemberDecorate %Inner 18 ColMajor
+               OpMemberDecorate %Inner 18 MatrixStride 8
+               OpMemberDecorate %Inner 19 Offset 384
+               OpMemberDecorate %Inner 19 ColMajor
+               OpMemberDecorate %Inner 19 MatrixStride 16
+               OpMemberDecorate %Inner 20 Offset 448
+               OpMemberDecorate %Inner 20 ColMajor
+               OpMemberDecorate %Inner 20 MatrixStride 16
+               OpMemberDecorate %Inner 21 Offset 512
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpDecorate %_runtimearr_Inner ArrayStride 544
+               OpDecorate %sb Binding 0
+               OpDecorate %sb DescriptorSet 0
        %uint = OpTypeInt 32 0
 %_ptr_Input_uint = OpTypePointer Input %uint
       %idx_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
         %int = OpTypeInt 32 1
+    %v2float = OpTypeVector %float 2
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+    %v3float = OpTypeVector %float 3
       %v3int = OpTypeVector %int 3
      %v3uint = OpTypeVector %uint 3
-      %float = OpTypeFloat 32
-    %v3float = OpTypeVector %float 3
-%mat2v3float = OpTypeMatrix %v3float 2
-    %v2float = OpTypeVector %float 2
-%mat3v2float = OpTypeMatrix %v2float 3
+    %v4float = OpTypeVector %float 4
       %v4int = OpTypeVector %int 4
-     %uint_4 = OpConstant %uint 4
-%_arr_v4int_uint_4 = OpTypeArray %v4int %uint_4
-      %Inner = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %mat2v3float %mat3v2float %_arr_v4int_uint_4
+     %v4uint = OpTypeVector %uint 4
+%mat2v2float = OpTypeMatrix %v2float 2
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v2float = OpTypeMatrix %v2float 4
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+      %Inner = OpTypeStruct %float %int %uint %v2float %v2int %v2uint %v3float %v3int %v3uint %v4float %v4int %v4uint %mat2v2float %mat2v3float %mat2v4float %mat3v2float %mat3v3float %mat3v4float %mat4v2float %mat4v3float %mat4v4float %_arr_v3float_uint_2
 %_runtimearr_Inner = OpTypeRuntimeArray %Inner
           %S = OpTypeStruct %_runtimearr_Inner
 %_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
-          %s = OpVariable %_ptr_StorageBuffer_S StorageBuffer
+         %sb = OpVariable %_ptr_StorageBuffer_S StorageBuffer
        %void = OpTypeVoid
-         %20 = OpTypeFunction %void %uint
+         %31 = OpTypeFunction %void %uint
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
-         %28 = OpConstantNull %v3int
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+         %39 = OpConstantNull %float
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
-         %32 = OpConstantNull %int
-     %uint_2 = OpConstant %uint 2
-%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
-         %36 = OpConstantNull %v3uint
-     %uint_3 = OpConstant %uint 3
+         %43 = OpConstantNull %int
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
-         %40 = OpConstantNull %uint
-%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
-         %43 = OpConstantNull %v3float
+         %46 = OpConstantNull %uint
+     %uint_3 = OpConstant %uint 3
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+         %50 = OpConstantNull %v2float
+     %uint_4 = OpConstant %uint 4
+%_ptr_StorageBuffer_v2int = OpTypePointer StorageBuffer %v2int
+         %54 = OpConstantNull %v2int
      %uint_5 = OpConstant %uint 5
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
-         %47 = OpConstantNull %float
+%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
+         %58 = OpConstantNull %v2uint
      %uint_6 = OpConstant %uint 6
-%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
-         %51 = OpConstantNull %mat2v3float
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+         %62 = OpConstantNull %v3float
      %uint_7 = OpConstant %uint 7
-%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
-         %55 = OpConstantNull %mat3v2float
+%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
+         %66 = OpConstantNull %v3int
      %uint_8 = OpConstant %uint 8
-%_ptr_StorageBuffer__arr_v4int_uint_4 = OpTypePointer StorageBuffer %_arr_v4int_uint_4
-         %59 = OpConstantNull %_arr_v4int_uint_4
-         %60 = OpTypeFunction %void
- %main_inner = OpFunction %void None %20
+%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
+         %70 = OpConstantNull %v3uint
+     %uint_9 = OpConstant %uint 9
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+         %74 = OpConstantNull %v4float
+    %uint_10 = OpConstant %uint 10
+%_ptr_StorageBuffer_v4int = OpTypePointer StorageBuffer %v4int
+         %78 = OpConstantNull %v4int
+    %uint_11 = OpConstant %uint 11
+%_ptr_StorageBuffer_v4uint = OpTypePointer StorageBuffer %v4uint
+         %82 = OpConstantNull %v4uint
+    %uint_12 = OpConstant %uint 12
+%_ptr_StorageBuffer_mat2v2float = OpTypePointer StorageBuffer %mat2v2float
+         %86 = OpConstantNull %mat2v2float
+    %uint_13 = OpConstant %uint 13
+%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+         %90 = OpConstantNull %mat2v3float
+    %uint_14 = OpConstant %uint 14
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+         %94 = OpConstantNull %mat2v4float
+    %uint_15 = OpConstant %uint 15
+%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+         %98 = OpConstantNull %mat3v2float
+    %uint_16 = OpConstant %uint 16
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+        %102 = OpConstantNull %mat3v3float
+    %uint_17 = OpConstant %uint 17
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+        %106 = OpConstantNull %mat3v4float
+    %uint_18 = OpConstant %uint 18
+%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
+        %110 = OpConstantNull %mat4v2float
+    %uint_19 = OpConstant %uint 19
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+        %114 = OpConstantNull %mat4v3float
+    %uint_20 = OpConstant %uint 20
+%_ptr_StorageBuffer_mat4v4float = OpTypePointer StorageBuffer %mat4v4float
+        %118 = OpConstantNull %mat4v4float
+    %uint_21 = OpConstant %uint 21
+%_ptr_StorageBuffer__arr_v3float_uint_2 = OpTypePointer StorageBuffer %_arr_v3float_uint_2
+        %122 = OpConstantNull %_arr_v3float_uint_2
+        %123 = OpTypeFunction %void
+ %main_inner = OpFunction %void None %31
         %idx = OpFunctionParameter %uint
-         %24 = OpLabel
-         %27 = OpAccessChain %_ptr_StorageBuffer_v3int %s %uint_0 %idx %uint_0
-               OpStore %27 %28
-         %31 = OpAccessChain %_ptr_StorageBuffer_int %s %uint_0 %idx %uint_1
-               OpStore %31 %32
-         %35 = OpAccessChain %_ptr_StorageBuffer_v3uint %s %uint_0 %idx %uint_2
-               OpStore %35 %36
-         %39 = OpAccessChain %_ptr_StorageBuffer_uint %s %uint_0 %idx %uint_3
-               OpStore %39 %40
-         %42 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %idx %uint_4
+         %35 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %sb %uint_0 %idx %uint_0
+               OpStore %38 %39
+         %42 = OpAccessChain %_ptr_StorageBuffer_int %sb %uint_0 %idx %uint_1
                OpStore %42 %43
-         %46 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %idx %uint_5
-               OpStore %46 %47
-         %50 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %s %uint_0 %idx %uint_6
-               OpStore %50 %51
-         %54 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %s %uint_0 %idx %uint_7
-               OpStore %54 %55
-         %58 = OpAccessChain %_ptr_StorageBuffer__arr_v4int_uint_4 %s %uint_0 %idx %uint_8
-               OpStore %58 %59
+         %45 = OpAccessChain %_ptr_StorageBuffer_uint %sb %uint_0 %idx %uint_2
+               OpStore %45 %46
+         %49 = OpAccessChain %_ptr_StorageBuffer_v2float %sb %uint_0 %idx %uint_3
+               OpStore %49 %50
+         %53 = OpAccessChain %_ptr_StorageBuffer_v2int %sb %uint_0 %idx %uint_4
+               OpStore %53 %54
+         %57 = OpAccessChain %_ptr_StorageBuffer_v2uint %sb %uint_0 %idx %uint_5
+               OpStore %57 %58
+         %61 = OpAccessChain %_ptr_StorageBuffer_v3float %sb %uint_0 %idx %uint_6
+               OpStore %61 %62
+         %65 = OpAccessChain %_ptr_StorageBuffer_v3int %sb %uint_0 %idx %uint_7
+               OpStore %65 %66
+         %69 = OpAccessChain %_ptr_StorageBuffer_v3uint %sb %uint_0 %idx %uint_8
+               OpStore %69 %70
+         %73 = OpAccessChain %_ptr_StorageBuffer_v4float %sb %uint_0 %idx %uint_9
+               OpStore %73 %74
+         %77 = OpAccessChain %_ptr_StorageBuffer_v4int %sb %uint_0 %idx %uint_10
+               OpStore %77 %78
+         %81 = OpAccessChain %_ptr_StorageBuffer_v4uint %sb %uint_0 %idx %uint_11
+               OpStore %81 %82
+         %85 = OpAccessChain %_ptr_StorageBuffer_mat2v2float %sb %uint_0 %idx %uint_12
+               OpStore %85 %86
+         %89 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %sb %uint_0 %idx %uint_13
+               OpStore %89 %90
+         %93 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %sb %uint_0 %idx %uint_14
+               OpStore %93 %94
+         %97 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %sb %uint_0 %idx %uint_15
+               OpStore %97 %98
+        %101 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %sb %uint_0 %idx %uint_16
+               OpStore %101 %102
+        %105 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %sb %uint_0 %idx %uint_17
+               OpStore %105 %106
+        %109 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %sb %uint_0 %idx %uint_18
+               OpStore %109 %110
+        %113 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %sb %uint_0 %idx %uint_19
+               OpStore %113 %114
+        %117 = OpAccessChain %_ptr_StorageBuffer_mat4v4float %sb %uint_0 %idx %uint_20
+               OpStore %117 %118
+        %121 = OpAccessChain %_ptr_StorageBuffer__arr_v3float_uint_2 %sb %uint_0 %idx %uint_21
+               OpStore %121 %122
                OpReturn
                OpFunctionEnd
-       %main = OpFunction %void None %60
-         %62 = OpLabel
-         %64 = OpLoad %uint %idx_1
-         %63 = OpFunctionCall %void %main_inner %64
+       %main = OpFunction %void None %123
+        %125 = OpLabel
+        %127 = OpLoad %uint %idx_1
+        %126 = OpFunctionCall %void %main_inner %127
                OpReturn
                OpFunctionEnd

diff --git a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.wgsl b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.wgsl
index d2fbdff..33c5197 100644
--- a/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.wgsl
+++ b/test/tint/buffer/storage/dynamic_index/write.wgsl.expected.wgsl

@@ -1,30 +1,56 @@
 struct Inner {
-  a : vec3<i32>,
-  b : i32,
-  c : vec3<u32>,
-  d : u32,
-  e : vec3<f32>,
-  f : f32,
-  g : mat2x3<f32>,
-  h : mat3x2<f32>,
-  i : array<vec4<i32>, 4>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
 }
 
 struct S {
   arr : array<Inner>,
 }
 
-@binding(0) @group(0) var<storage, read_write> s : S;
+@binding(0) @group(0) var<storage, read_write> sb : S;
 
 @compute @workgroup_size(1)
 fn main(@builtin(local_invocation_index) idx : u32) {
-  s.arr[idx].a = vec3<i32>();
-  s.arr[idx].b = i32();
-  s.arr[idx].c = vec3<u32>();
-  s.arr[idx].d = u32();
-  s.arr[idx].e = vec3<f32>();
-  s.arr[idx].f = f32();
-  s.arr[idx].g = mat2x3<f32>();
-  s.arr[idx].h = mat3x2<f32>();
-  s.arr[idx].i = array<vec4<i32>, 4>();
+  sb.arr[idx].scalar_f32 = f32();
+  sb.arr[idx].scalar_i32 = i32();
+  sb.arr[idx].scalar_u32 = u32();
+  sb.arr[idx].vec2_f32 = vec2<f32>();
+  sb.arr[idx].vec2_i32 = vec2<i32>();
+  sb.arr[idx].vec2_u32 = vec2<u32>();
+  sb.arr[idx].vec3_f32 = vec3<f32>();
+  sb.arr[idx].vec3_i32 = vec3<i32>();
+  sb.arr[idx].vec3_u32 = vec3<u32>();
+  sb.arr[idx].vec4_f32 = vec4<f32>();
+  sb.arr[idx].vec4_i32 = vec4<i32>();
+  sb.arr[idx].vec4_u32 = vec4<u32>();
+  sb.arr[idx].mat2x2_f32 = mat2x2<f32>();
+  sb.arr[idx].mat2x3_f32 = mat2x3<f32>();
+  sb.arr[idx].mat2x4_f32 = mat2x4<f32>();
+  sb.arr[idx].mat3x2_f32 = mat3x2<f32>();
+  sb.arr[idx].mat3x3_f32 = mat3x3<f32>();
+  sb.arr[idx].mat3x4_f32 = mat3x4<f32>();
+  sb.arr[idx].mat4x2_f32 = mat4x2<f32>();
+  sb.arr[idx].mat4x3_f32 = mat4x3<f32>();
+  sb.arr[idx].mat4x4_f32 = mat4x4<f32>();
+  sb.arr[idx].arr2_vec3_f32 = array<vec3<f32>, 2>();
 }

diff --git a/test/tint/buffer/storage/dynamic_index/write_f16.wgsl b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl
new file mode 100644
index 0000000..a683fe2
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl

@@ -0,0 +1,87 @@
+enable f16;
+
+struct Inner {
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    scalar_f16 : f16,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec2_f16 : vec2<f16>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec3_f16 : vec3<f16>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    vec4_f16 : vec4<f16>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    mat2x2_f16 : mat2x2<f16>,
+    mat2x3_f16 : mat2x3<f16>,
+    mat2x4_f16 : mat2x4<f16>,
+    mat3x2_f16 : mat3x2<f16>,
+    mat3x3_f16 : mat3x3<f16>,
+    mat3x4_f16 : mat3x4<f16>,
+    mat4x2_f16 : mat4x2<f16>,
+    mat4x3_f16 : mat4x3<f16>,
+    mat4x4_f16 : mat4x4<f16>,
+    arr2_vec3_f32 : array<vec3<f32>, 2>,
+    arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+
+};
+
+struct S {
+    arr : array<Inner>,
+};
+
+@binding(0) @group(0) var<storage, read_write> sb : S;
+
+@compute @workgroup_size(1)
+fn main(@builtin(local_invocation_index) idx : u32) {
+    sb.arr[idx].scalar_f32 = f32();
+    sb.arr[idx].scalar_i32 = i32();
+    sb.arr[idx].scalar_u32 = u32();
+    sb.arr[idx].scalar_f16 = f16();
+    sb.arr[idx].vec2_f32 = vec2<f32>();
+    sb.arr[idx].vec2_i32 = vec2<i32>();
+    sb.arr[idx].vec2_u32 = vec2<u32>();
+    sb.arr[idx].vec2_f16 = vec2<f16>();
+    sb.arr[idx].vec3_f32 = vec3<f32>();
+    sb.arr[idx].vec3_i32 = vec3<i32>();
+    sb.arr[idx].vec3_u32 = vec3<u32>();
+    sb.arr[idx].vec3_f16 = vec3<f16>();
+    sb.arr[idx].vec4_f32 = vec4<f32>();
+    sb.arr[idx].vec4_i32 = vec4<i32>();
+    sb.arr[idx].vec4_u32 = vec4<u32>();
+    sb.arr[idx].vec4_f16 = vec4<f16>();
+    sb.arr[idx].mat2x2_f32 = mat2x2<f32>();
+    sb.arr[idx].mat2x3_f32 = mat2x3<f32>();
+    sb.arr[idx].mat2x4_f32 = mat2x4<f32>();
+    sb.arr[idx].mat3x2_f32 = mat3x2<f32>();
+    sb.arr[idx].mat3x3_f32 = mat3x3<f32>();
+    sb.arr[idx].mat3x4_f32 = mat3x4<f32>();
+    sb.arr[idx].mat4x2_f32 = mat4x2<f32>();
+    sb.arr[idx].mat4x3_f32 = mat4x3<f32>();
+    sb.arr[idx].mat4x4_f32 = mat4x4<f32>();
+    sb.arr[idx].mat2x2_f16 = mat2x2<f16>();
+    sb.arr[idx].mat2x3_f16 = mat2x3<f16>();
+    sb.arr[idx].mat2x4_f16 = mat2x4<f16>();
+    sb.arr[idx].mat3x2_f16 = mat3x2<f16>();
+    sb.arr[idx].mat3x3_f16 = mat3x3<f16>();
+    sb.arr[idx].mat3x4_f16 = mat3x4<f16>();
+    sb.arr[idx].mat4x2_f16 = mat4x2<f16>();
+    sb.arr[idx].mat4x3_f16 = mat4x3<f16>();
+    sb.arr[idx].mat4x4_f16 = mat4x4<f16>();
+    sb.arr[idx].arr2_vec3_f32 = array<vec3<f32>, 2>();
+    sb.arr[idx].arr2_mat4x2_f16 = array<mat4x2<f16>, 2>();
+}

diff --git a/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..fe9d9f0
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,178 @@
+RWByteAddressBuffer sb : register(u0, space0);
+
+struct tint_symbol_1 {
+  uint idx : SV_GroupIndex;
+};
+
+void tint_symbol_18(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_19(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_20(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_21(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+}
+
+void tint_symbol_22(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_23(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_24(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_25(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_26(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_27(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+}
+
+void tint_symbol_28(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_29(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_30(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+}
+
+void tint_symbol_31(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_32(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_33(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+void tint_symbol_34(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_35(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_36(RWByteAddressBuffer buffer, uint offset, float3 value[2]) {
+  float3 array[2] = value;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      buffer.Store3((offset + (i * 16u)), asuint(array[i]));
+    }
+  }
+}
+
+void tint_symbol_37(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value[2]) {
+  matrix<float16_t, 4, 2> array_1[2] = value;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      tint_symbol_33(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
+    }
+  }
+}
+
+void main_inner(uint idx) {
+  sb.Store((800u * idx), asuint(0.0f));
+  sb.Store(((800u * idx) + 4u), asuint(0));
+  sb.Store(((800u * idx) + 8u), asuint(0u));
+  sb.Store<float16_t>(((800u * idx) + 12u), float16_t(0.0h));
+  sb.Store2(((800u * idx) + 16u), asuint((0.0f).xx));
+  sb.Store2(((800u * idx) + 24u), asuint((0).xx));
+  sb.Store2(((800u * idx) + 32u), asuint((0u).xx));
+  sb.Store<vector<float16_t, 2> >(((800u * idx) + 40u), (float16_t(0.0h)).xx);
+  sb.Store3(((800u * idx) + 48u), asuint((0.0f).xxx));
+  sb.Store3(((800u * idx) + 64u), asuint((0).xxx));
+  sb.Store3(((800u * idx) + 80u), asuint((0u).xxx));
+  sb.Store<vector<float16_t, 3> >(((800u * idx) + 96u), (float16_t(0.0h)).xxx);
+  sb.Store4(((800u * idx) + 112u), asuint((0.0f).xxxx));
+  sb.Store4(((800u * idx) + 128u), asuint((0).xxxx));
+  sb.Store4(((800u * idx) + 144u), asuint((0u).xxxx));
+  sb.Store<vector<float16_t, 4> >(((800u * idx) + 160u), (float16_t(0.0h)).xxxx);
+  tint_symbol_18(sb, ((800u * idx) + 168u), float2x2((0.0f).xx, (0.0f).xx));
+  tint_symbol_19(sb, ((800u * idx) + 192u), float2x3((0.0f).xxx, (0.0f).xxx));
+  tint_symbol_20(sb, ((800u * idx) + 224u), float2x4((0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_21(sb, ((800u * idx) + 256u), float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_22(sb, ((800u * idx) + 288u), float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_23(sb, ((800u * idx) + 336u), float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_24(sb, ((800u * idx) + 384u), float4x2((0.0f).xx, (0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_25(sb, ((800u * idx) + 416u), float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_26(sb, ((800u * idx) + 480u), float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_27(sb, ((800u * idx) + 544u), matrix<float16_t, 2, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_28(sb, ((800u * idx) + 552u), matrix<float16_t, 2, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_29(sb, ((800u * idx) + 568u), matrix<float16_t, 2, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  tint_symbol_30(sb, ((800u * idx) + 584u), matrix<float16_t, 3, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_31(sb, ((800u * idx) + 600u), matrix<float16_t, 3, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_32(sb, ((800u * idx) + 624u), matrix<float16_t, 3, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  tint_symbol_33(sb, ((800u * idx) + 648u), matrix<float16_t, 4, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_34(sb, ((800u * idx) + 664u), matrix<float16_t, 4, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_35(sb, ((800u * idx) + 696u), matrix<float16_t, 4, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  const float3 tint_symbol_38[2] = (float3[2])0;
+  tint_symbol_36(sb, ((800u * idx) + 736u), tint_symbol_38);
+  const matrix<float16_t, 4, 2> tint_symbol_39[2] = (matrix<float16_t, 4, 2>[2])0;
+  tint_symbol_37(sb, ((800u * idx) + 768u), tint_symbol_39);
+}
+
+[numthreads(1, 1, 1)]
+void main(tint_symbol_1 tint_symbol) {
+  main_inner(tint_symbol.idx);
+  return;
+}

diff --git a/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8bd57ef
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,184 @@
+SKIP: FAILED
+
+RWByteAddressBuffer sb : register(u0, space0);
+
+struct tint_symbol_1 {
+  uint idx : SV_GroupIndex;
+};
+
+void tint_symbol_18(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_19(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_20(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_21(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+}
+
+void tint_symbol_22(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_23(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_24(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_25(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_26(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_27(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+}
+
+void tint_symbol_28(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_29(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_30(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+}
+
+void tint_symbol_31(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_32(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_33(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+void tint_symbol_34(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_35(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_36(RWByteAddressBuffer buffer, uint offset, float3 value[2]) {
+  float3 array[2] = value;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      buffer.Store3((offset + (i * 16u)), asuint(array[i]));
+    }
+  }
+}
+
+void tint_symbol_37(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value[2]) {
+  matrix<float16_t, 4, 2> array_1[2] = value;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      tint_symbol_33(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
+    }
+  }
+}
+
+void main_inner(uint idx) {
+  sb.Store((800u * idx), asuint(0.0f));
+  sb.Store(((800u * idx) + 4u), asuint(0));
+  sb.Store(((800u * idx) + 8u), asuint(0u));
+  sb.Store<float16_t>(((800u * idx) + 12u), float16_t(0.0h));
+  sb.Store2(((800u * idx) + 16u), asuint((0.0f).xx));
+  sb.Store2(((800u * idx) + 24u), asuint((0).xx));
+  sb.Store2(((800u * idx) + 32u), asuint((0u).xx));
+  sb.Store<vector<float16_t, 2> >(((800u * idx) + 40u), (float16_t(0.0h)).xx);
+  sb.Store3(((800u * idx) + 48u), asuint((0.0f).xxx));
+  sb.Store3(((800u * idx) + 64u), asuint((0).xxx));
+  sb.Store3(((800u * idx) + 80u), asuint((0u).xxx));
+  sb.Store<vector<float16_t, 3> >(((800u * idx) + 96u), (float16_t(0.0h)).xxx);
+  sb.Store4(((800u * idx) + 112u), asuint((0.0f).xxxx));
+  sb.Store4(((800u * idx) + 128u), asuint((0).xxxx));
+  sb.Store4(((800u * idx) + 144u), asuint((0u).xxxx));
+  sb.Store<vector<float16_t, 4> >(((800u * idx) + 160u), (float16_t(0.0h)).xxxx);
+  tint_symbol_18(sb, ((800u * idx) + 168u), float2x2((0.0f).xx, (0.0f).xx));
+  tint_symbol_19(sb, ((800u * idx) + 192u), float2x3((0.0f).xxx, (0.0f).xxx));
+  tint_symbol_20(sb, ((800u * idx) + 224u), float2x4((0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_21(sb, ((800u * idx) + 256u), float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_22(sb, ((800u * idx) + 288u), float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_23(sb, ((800u * idx) + 336u), float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_24(sb, ((800u * idx) + 384u), float4x2((0.0f).xx, (0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_25(sb, ((800u * idx) + 416u), float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_26(sb, ((800u * idx) + 480u), float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_27(sb, ((800u * idx) + 544u), matrix<float16_t, 2, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_28(sb, ((800u * idx) + 552u), matrix<float16_t, 2, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_29(sb, ((800u * idx) + 568u), matrix<float16_t, 2, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  tint_symbol_30(sb, ((800u * idx) + 584u), matrix<float16_t, 3, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_31(sb, ((800u * idx) + 600u), matrix<float16_t, 3, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_32(sb, ((800u * idx) + 624u), matrix<float16_t, 3, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  tint_symbol_33(sb, ((800u * idx) + 648u), matrix<float16_t, 4, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_34(sb, ((800u * idx) + 664u), matrix<float16_t, 4, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_35(sb, ((800u * idx) + 696u), matrix<float16_t, 4, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  const float3 tint_symbol_38[2] = (float3[2])0;
+  tint_symbol_36(sb, ((800u * idx) + 736u), tint_symbol_38);
+  const matrix<float16_t, 4, 2> tint_symbol_39[2] = (matrix<float16_t, 4, 2>[2])0;
+  tint_symbol_37(sb, ((800u * idx) + 768u), tint_symbol_39);
+}
+
+[numthreads(1, 1, 1)]
+void main(tint_symbol_1 tint_symbol) {
+  main_inner(tint_symbol.idx);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002B00914B0D0(61,69-77): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002B00914B0D0(62,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.glsl b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..0b00be4
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.glsl

@@ -0,0 +1,105 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
+  float16_t scalar_f16;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  f16vec2 vec2_f16;
+  uint pad;
+  vec3 vec3_f32;
+  uint pad_1;
+  ivec3 vec3_i32;
+  uint pad_2;
+  uvec3 vec3_u32;
+  uint pad_3;
+  f16vec3 vec3_f16;
+  uint pad_4;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  f16vec4 vec4_f16;
+  mat2 mat2x2_f32;
+  uint pad_6;
+  uint pad_7;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_8;
+  uint pad_9;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  f16mat2 mat2x2_f16;
+  f16mat2x3 mat2x3_f16;
+  f16mat2x4 mat2x4_f16;
+  f16mat3x2 mat3x2_f16;
+  uint pad_10;
+  f16mat3 mat3x3_f16;
+  f16mat3x4 mat3x4_f16;
+  f16mat4x2 mat4x2_f16;
+  f16mat4x3 mat4x3_f16;
+  f16mat4 mat4x4_f16;
+  uint pad_11;
+  uint pad_12;
+  vec3 arr2_vec3_f32[2];
+  f16mat4x2 arr2_mat4x2_f16[2];
+};
+
+layout(binding = 0, std430) buffer S_ssbo {
+  Inner arr[];
+} sb;
+
+void tint_symbol(uint idx) {
+  sb.arr[idx].scalar_f32 = 0.0f;
+  sb.arr[idx].scalar_i32 = 0;
+  sb.arr[idx].scalar_u32 = 0u;
+  sb.arr[idx].scalar_f16 = 0.0hf;
+  sb.arr[idx].vec2_f32 = vec2(0.0f);
+  sb.arr[idx].vec2_i32 = ivec2(0);
+  sb.arr[idx].vec2_u32 = uvec2(0u);
+  sb.arr[idx].vec2_f16 = f16vec2(0.0hf);
+  sb.arr[idx].vec3_f32 = vec3(0.0f);
+  sb.arr[idx].vec3_i32 = ivec3(0);
+  sb.arr[idx].vec3_u32 = uvec3(0u);
+  sb.arr[idx].vec3_f16 = f16vec3(0.0hf);
+  sb.arr[idx].vec4_f32 = vec4(0.0f);
+  sb.arr[idx].vec4_i32 = ivec4(0);
+  sb.arr[idx].vec4_u32 = uvec4(0u);
+  sb.arr[idx].vec4_f16 = f16vec4(0.0hf);
+  sb.arr[idx].mat2x2_f32 = mat2(vec2(0.0f), vec2(0.0f));
+  sb.arr[idx].mat2x3_f32 = mat2x3(vec3(0.0f), vec3(0.0f));
+  sb.arr[idx].mat2x4_f32 = mat2x4(vec4(0.0f), vec4(0.0f));
+  sb.arr[idx].mat3x2_f32 = mat3x2(vec2(0.0f), vec2(0.0f), vec2(0.0f));
+  sb.arr[idx].mat3x3_f32 = mat3(vec3(0.0f), vec3(0.0f), vec3(0.0f));
+  sb.arr[idx].mat3x4_f32 = mat3x4(vec4(0.0f), vec4(0.0f), vec4(0.0f));
+  sb.arr[idx].mat4x2_f32 = mat4x2(vec2(0.0f), vec2(0.0f), vec2(0.0f), vec2(0.0f));
+  sb.arr[idx].mat4x3_f32 = mat4x3(vec3(0.0f), vec3(0.0f), vec3(0.0f), vec3(0.0f));
+  sb.arr[idx].mat4x4_f32 = mat4(vec4(0.0f), vec4(0.0f), vec4(0.0f), vec4(0.0f));
+  sb.arr[idx].mat2x2_f16 = f16mat2(f16vec2(0.0hf), f16vec2(0.0hf));
+  sb.arr[idx].mat2x3_f16 = f16mat2x3(f16vec3(0.0hf), f16vec3(0.0hf));
+  sb.arr[idx].mat2x4_f16 = f16mat2x4(f16vec4(0.0hf), f16vec4(0.0hf));
+  sb.arr[idx].mat3x2_f16 = f16mat3x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf));
+  sb.arr[idx].mat3x3_f16 = f16mat3(f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf));
+  sb.arr[idx].mat3x4_f16 = f16mat3x4(f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf));
+  sb.arr[idx].mat4x2_f16 = f16mat4x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf));
+  sb.arr[idx].mat4x3_f16 = f16mat4x3(f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf));
+  sb.arr[idx].mat4x4_f16 = f16mat4(f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf));
+  vec3 tint_symbol_1[2] = vec3[2](vec3(0.0f), vec3(0.0f));
+  sb.arr[idx].arr2_vec3_f32 = tint_symbol_1;
+  f16mat4x2 tint_symbol_2[2] = f16mat4x2[2](f16mat4x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf)), f16mat4x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf)));
+  sb.arr[idx].arr2_mat4x2_f16 = tint_symbol_2;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.msl b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.msl
new file mode 100644
index 0000000..eb9bf7c
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.msl

@@ -0,0 +1,115 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ half scalar_f16;
+  /* 0x000e */ tint_array<int8_t, 2> tint_pad;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ half2 vec2_f16;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_3;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0060 */ packed_half3 vec3_f16;
+  /* 0x0066 */ tint_array<int8_t, 10> tint_pad_5;
+  /* 0x0070 */ float4 vec4_f32;
+  /* 0x0080 */ int4 vec4_i32;
+  /* 0x0090 */ uint4 vec4_u32;
+  /* 0x00a0 */ half4 vec4_f16;
+  /* 0x00a8 */ float2x2 mat2x2_f32;
+  /* 0x00b8 */ tint_array<int8_t, 8> tint_pad_6;
+  /* 0x00c0 */ float2x3 mat2x3_f32;
+  /* 0x00e0 */ float2x4 mat2x4_f32;
+  /* 0x0100 */ float3x2 mat3x2_f32;
+  /* 0x0118 */ tint_array<int8_t, 8> tint_pad_7;
+  /* 0x0120 */ float3x3 mat3x3_f32;
+  /* 0x0150 */ float3x4 mat3x4_f32;
+  /* 0x0180 */ float4x2 mat4x2_f32;
+  /* 0x01a0 */ float4x3 mat4x3_f32;
+  /* 0x01e0 */ float4x4 mat4x4_f32;
+  /* 0x0220 */ half2x2 mat2x2_f16;
+  /* 0x0228 */ half2x3 mat2x3_f16;
+  /* 0x0238 */ half2x4 mat2x4_f16;
+  /* 0x0248 */ half3x2 mat3x2_f16;
+  /* 0x0254 */ tint_array<int8_t, 4> tint_pad_8;
+  /* 0x0258 */ half3x3 mat3x3_f16;
+  /* 0x0270 */ half3x4 mat3x4_f16;
+  /* 0x0288 */ half4x2 mat4x2_f16;
+  /* 0x0298 */ half4x3 mat4x3_f16;
+  /* 0x02b8 */ half4x4 mat4x4_f16;
+  /* 0x02d8 */ tint_array<int8_t, 8> tint_pad_9;
+  /* 0x02e0 */ tint_array<float3, 2> arr2_vec3_f32;
+  /* 0x0300 */ tint_array<half4x2, 2> arr2_mat4x2_f16;
+};
+
+struct S {
+  /* 0x0000 */ tint_array<Inner, 1> arr;
+};
+
+void tint_symbol_inner(uint idx, device S* const tint_symbol_3) {
+  (*(tint_symbol_3)).arr[idx].scalar_f32 = 0.0f;
+  (*(tint_symbol_3)).arr[idx].scalar_i32 = 0;
+  (*(tint_symbol_3)).arr[idx].scalar_u32 = 0u;
+  (*(tint_symbol_3)).arr[idx].scalar_f16 = 0.0h;
+  (*(tint_symbol_3)).arr[idx].vec2_f32 = float2(0.0f);
+  (*(tint_symbol_3)).arr[idx].vec2_i32 = int2(0);
+  (*(tint_symbol_3)).arr[idx].vec2_u32 = uint2(0u);
+  (*(tint_symbol_3)).arr[idx].vec2_f16 = half2(0.0h);
+  (*(tint_symbol_3)).arr[idx].vec3_f32 = float3(0.0f);
+  (*(tint_symbol_3)).arr[idx].vec3_i32 = int3(0);
+  (*(tint_symbol_3)).arr[idx].vec3_u32 = uint3(0u);
+  (*(tint_symbol_3)).arr[idx].vec3_f16 = half3(0.0h);
+  (*(tint_symbol_3)).arr[idx].vec4_f32 = float4(0.0f);
+  (*(tint_symbol_3)).arr[idx].vec4_i32 = int4(0);
+  (*(tint_symbol_3)).arr[idx].vec4_u32 = uint4(0u);
+  (*(tint_symbol_3)).arr[idx].vec4_f16 = half4(0.0h);
+  (*(tint_symbol_3)).arr[idx].mat2x2_f32 = float2x2(float2(0.0f), float2(0.0f));
+  (*(tint_symbol_3)).arr[idx].mat2x3_f32 = float2x3(float3(0.0f), float3(0.0f));
+  (*(tint_symbol_3)).arr[idx].mat2x4_f32 = float2x4(float4(0.0f), float4(0.0f));
+  (*(tint_symbol_3)).arr[idx].mat3x2_f32 = float3x2(float2(0.0f), float2(0.0f), float2(0.0f));
+  (*(tint_symbol_3)).arr[idx].mat3x3_f32 = float3x3(float3(0.0f), float3(0.0f), float3(0.0f));
+  (*(tint_symbol_3)).arr[idx].mat3x4_f32 = float3x4(float4(0.0f), float4(0.0f), float4(0.0f));
+  (*(tint_symbol_3)).arr[idx].mat4x2_f32 = float4x2(float2(0.0f), float2(0.0f), float2(0.0f), float2(0.0f));
+  (*(tint_symbol_3)).arr[idx].mat4x3_f32 = float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f));
+  (*(tint_symbol_3)).arr[idx].mat4x4_f32 = float4x4(float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f));
+  (*(tint_symbol_3)).arr[idx].mat2x2_f16 = half2x2(half2(0.0h), half2(0.0h));
+  (*(tint_symbol_3)).arr[idx].mat2x3_f16 = half2x3(half3(0.0h), half3(0.0h));
+  (*(tint_symbol_3)).arr[idx].mat2x4_f16 = half2x4(half4(0.0h), half4(0.0h));
+  (*(tint_symbol_3)).arr[idx].mat3x2_f16 = half3x2(half2(0.0h), half2(0.0h), half2(0.0h));
+  (*(tint_symbol_3)).arr[idx].mat3x3_f16 = half3x3(half3(0.0h), half3(0.0h), half3(0.0h));
+  (*(tint_symbol_3)).arr[idx].mat3x4_f16 = half3x4(half4(0.0h), half4(0.0h), half4(0.0h));
+  (*(tint_symbol_3)).arr[idx].mat4x2_f16 = half4x2(half2(0.0h), half2(0.0h), half2(0.0h), half2(0.0h));
+  (*(tint_symbol_3)).arr[idx].mat4x3_f16 = half4x3(half3(0.0h), half3(0.0h), half3(0.0h), half3(0.0h));
+  (*(tint_symbol_3)).arr[idx].mat4x4_f16 = half4x4(half4(0.0h), half4(0.0h), half4(0.0h), half4(0.0h));
+  tint_array<float3, 2> const tint_symbol_1 = tint_array<float3, 2>{};
+  (*(tint_symbol_3)).arr[idx].arr2_vec3_f32 = tint_symbol_1;
+  tint_array<half4x2, 2> const tint_symbol_2 = tint_array<half4x2, 2>{};
+  (*(tint_symbol_3)).arr[idx].arr2_mat4x2_f16 = tint_symbol_2;
+}
+
+kernel void tint_symbol(device S* tint_symbol_4 [[buffer(0)]], uint idx [[thread_index_in_threadgroup]]) {
+  tint_symbol_inner(idx, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..dfd8ef7
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.spvasm

@@ -0,0 +1,376 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 198
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %idx_1
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %idx_1 "idx_1"
+               OpName %S "S"
+               OpMemberName %S 0 "arr"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "scalar_f32"
+               OpMemberName %Inner 1 "scalar_i32"
+               OpMemberName %Inner 2 "scalar_u32"
+               OpMemberName %Inner 3 "scalar_f16"
+               OpMemberName %Inner 4 "vec2_f32"
+               OpMemberName %Inner 5 "vec2_i32"
+               OpMemberName %Inner 6 "vec2_u32"
+               OpMemberName %Inner 7 "vec2_f16"
+               OpMemberName %Inner 8 "vec3_f32"
+               OpMemberName %Inner 9 "vec3_i32"
+               OpMemberName %Inner 10 "vec3_u32"
+               OpMemberName %Inner 11 "vec3_f16"
+               OpMemberName %Inner 12 "vec4_f32"
+               OpMemberName %Inner 13 "vec4_i32"
+               OpMemberName %Inner 14 "vec4_u32"
+               OpMemberName %Inner 15 "vec4_f16"
+               OpMemberName %Inner 16 "mat2x2_f32"
+               OpMemberName %Inner 17 "mat2x3_f32"
+               OpMemberName %Inner 18 "mat2x4_f32"
+               OpMemberName %Inner 19 "mat3x2_f32"
+               OpMemberName %Inner 20 "mat3x3_f32"
+               OpMemberName %Inner 21 "mat3x4_f32"
+               OpMemberName %Inner 22 "mat4x2_f32"
+               OpMemberName %Inner 23 "mat4x3_f32"
+               OpMemberName %Inner 24 "mat4x4_f32"
+               OpMemberName %Inner 25 "mat2x2_f16"
+               OpMemberName %Inner 26 "mat2x3_f16"
+               OpMemberName %Inner 27 "mat2x4_f16"
+               OpMemberName %Inner 28 "mat3x2_f16"
+               OpMemberName %Inner 29 "mat3x3_f16"
+               OpMemberName %Inner 30 "mat3x4_f16"
+               OpMemberName %Inner 31 "mat4x2_f16"
+               OpMemberName %Inner 32 "mat4x3_f16"
+               OpMemberName %Inner 33 "mat4x4_f16"
+               OpMemberName %Inner 34 "arr2_vec3_f32"
+               OpMemberName %Inner 35 "arr2_mat4x2_f16"
+               OpName %sb "sb"
+               OpName %main_inner "main_inner"
+               OpName %idx "idx"
+               OpName %main "main"
+               OpDecorate %idx_1 BuiltIn LocalInvocationIndex
+               OpDecorate %S Block
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 1 Offset 4
+               OpMemberDecorate %Inner 2 Offset 8
+               OpMemberDecorate %Inner 3 Offset 12
+               OpMemberDecorate %Inner 4 Offset 16
+               OpMemberDecorate %Inner 5 Offset 24
+               OpMemberDecorate %Inner 6 Offset 32
+               OpMemberDecorate %Inner 7 Offset 40
+               OpMemberDecorate %Inner 8 Offset 48
+               OpMemberDecorate %Inner 9 Offset 64
+               OpMemberDecorate %Inner 10 Offset 80
+               OpMemberDecorate %Inner 11 Offset 96
+               OpMemberDecorate %Inner 12 Offset 112
+               OpMemberDecorate %Inner 13 Offset 128
+               OpMemberDecorate %Inner 14 Offset 144
+               OpMemberDecorate %Inner 15 Offset 160
+               OpMemberDecorate %Inner 16 Offset 168
+               OpMemberDecorate %Inner 16 ColMajor
+               OpMemberDecorate %Inner 16 MatrixStride 8
+               OpMemberDecorate %Inner 17 Offset 192
+               OpMemberDecorate %Inner 17 ColMajor
+               OpMemberDecorate %Inner 17 MatrixStride 16
+               OpMemberDecorate %Inner 18 Offset 224
+               OpMemberDecorate %Inner 18 ColMajor
+               OpMemberDecorate %Inner 18 MatrixStride 16
+               OpMemberDecorate %Inner 19 Offset 256
+               OpMemberDecorate %Inner 19 ColMajor
+               OpMemberDecorate %Inner 19 MatrixStride 8
+               OpMemberDecorate %Inner 20 Offset 288
+               OpMemberDecorate %Inner 20 ColMajor
+               OpMemberDecorate %Inner 20 MatrixStride 16
+               OpMemberDecorate %Inner 21 Offset 336
+               OpMemberDecorate %Inner 21 ColMajor
+               OpMemberDecorate %Inner 21 MatrixStride 16
+               OpMemberDecorate %Inner 22 Offset 384
+               OpMemberDecorate %Inner 22 ColMajor
+               OpMemberDecorate %Inner 22 MatrixStride 8
+               OpMemberDecorate %Inner 23 Offset 416
+               OpMemberDecorate %Inner 23 ColMajor
+               OpMemberDecorate %Inner 23 MatrixStride 16
+               OpMemberDecorate %Inner 24 Offset 480
+               OpMemberDecorate %Inner 24 ColMajor
+               OpMemberDecorate %Inner 24 MatrixStride 16
+               OpMemberDecorate %Inner 25 Offset 544
+               OpMemberDecorate %Inner 25 ColMajor
+               OpMemberDecorate %Inner 25 MatrixStride 4
+               OpMemberDecorate %Inner 26 Offset 552
+               OpMemberDecorate %Inner 26 ColMajor
+               OpMemberDecorate %Inner 26 MatrixStride 8
+               OpMemberDecorate %Inner 27 Offset 568
+               OpMemberDecorate %Inner 27 ColMajor
+               OpMemberDecorate %Inner 27 MatrixStride 8
+               OpMemberDecorate %Inner 28 Offset 584
+               OpMemberDecorate %Inner 28 ColMajor
+               OpMemberDecorate %Inner 28 MatrixStride 4
+               OpMemberDecorate %Inner 29 Offset 600
+               OpMemberDecorate %Inner 29 ColMajor
+               OpMemberDecorate %Inner 29 MatrixStride 8
+               OpMemberDecorate %Inner 30 Offset 624
+               OpMemberDecorate %Inner 30 ColMajor
+               OpMemberDecorate %Inner 30 MatrixStride 8
+               OpMemberDecorate %Inner 31 Offset 648
+               OpMemberDecorate %Inner 31 ColMajor
+               OpMemberDecorate %Inner 31 MatrixStride 4
+               OpMemberDecorate %Inner 32 Offset 664
+               OpMemberDecorate %Inner 32 ColMajor
+               OpMemberDecorate %Inner 32 MatrixStride 8
+               OpMemberDecorate %Inner 33 Offset 696
+               OpMemberDecorate %Inner 33 ColMajor
+               OpMemberDecorate %Inner 33 MatrixStride 8
+               OpMemberDecorate %Inner 34 Offset 736
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpMemberDecorate %Inner 35 Offset 768
+               OpMemberDecorate %Inner 35 ColMajor
+               OpMemberDecorate %Inner 35 MatrixStride 4
+               OpDecorate %_arr_mat4v2half_uint_2 ArrayStride 16
+               OpDecorate %_runtimearr_Inner ArrayStride 800
+               OpDecorate %sb Binding 0
+               OpDecorate %sb DescriptorSet 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+      %idx_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+    %v2float = OpTypeVector %float 2
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+     %v2half = OpTypeVector %half 2
+    %v3float = OpTypeVector %float 3
+      %v3int = OpTypeVector %int 3
+     %v3uint = OpTypeVector %uint 3
+     %v3half = OpTypeVector %half 3
+    %v4float = OpTypeVector %float 4
+      %v4int = OpTypeVector %int 4
+     %v4uint = OpTypeVector %uint 4
+     %v4half = OpTypeVector %half 4
+%mat2v2float = OpTypeMatrix %v2float 2
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v2float = OpTypeMatrix %v2float 4
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+ %mat2v2half = OpTypeMatrix %v2half 2
+ %mat2v3half = OpTypeMatrix %v3half 2
+ %mat2v4half = OpTypeMatrix %v4half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+ %mat3v3half = OpTypeMatrix %v3half 3
+ %mat3v4half = OpTypeMatrix %v4half 3
+ %mat4v2half = OpTypeMatrix %v2half 4
+ %mat4v3half = OpTypeMatrix %v3half 4
+ %mat4v4half = OpTypeMatrix %v4half 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+%_arr_mat4v2half_uint_2 = OpTypeArray %mat4v2half %uint_2
+      %Inner = OpTypeStruct %float %int %uint %half %v2float %v2int %v2uint %v2half %v3float %v3int %v3uint %v3half %v4float %v4int %v4uint %v4half %mat2v2float %mat2v3float %mat2v4float %mat3v2float %mat3v3float %mat3v4float %mat4v2float %mat4v3float %mat4v4float %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half %_arr_v3float_uint_2 %_arr_mat4v2half_uint_2
+%_runtimearr_Inner = OpTypeRuntimeArray %Inner
+          %S = OpTypeStruct %_runtimearr_Inner
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+         %sb = OpVariable %_ptr_StorageBuffer_S StorageBuffer
+       %void = OpTypeVoid
+         %45 = OpTypeFunction %void %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+         %53 = OpConstantNull %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+         %57 = OpConstantNull %int
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+         %60 = OpConstantNull %uint
+     %uint_3 = OpConstant %uint 3
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+         %64 = OpConstantNull %half
+     %uint_4 = OpConstant %uint 4
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+         %68 = OpConstantNull %v2float
+     %uint_5 = OpConstant %uint 5
+%_ptr_StorageBuffer_v2int = OpTypePointer StorageBuffer %v2int
+         %72 = OpConstantNull %v2int
+     %uint_6 = OpConstant %uint 6
+%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
+         %76 = OpConstantNull %v2uint
+     %uint_7 = OpConstant %uint 7
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+         %80 = OpConstantNull %v2half
+     %uint_8 = OpConstant %uint 8
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+         %84 = OpConstantNull %v3float
+     %uint_9 = OpConstant %uint 9
+%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
+         %88 = OpConstantNull %v3int
+    %uint_10 = OpConstant %uint 10
+%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
+         %92 = OpConstantNull %v3uint
+    %uint_11 = OpConstant %uint 11
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+         %96 = OpConstantNull %v3half
+    %uint_12 = OpConstant %uint 12
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+        %100 = OpConstantNull %v4float
+    %uint_13 = OpConstant %uint 13
+%_ptr_StorageBuffer_v4int = OpTypePointer StorageBuffer %v4int
+        %104 = OpConstantNull %v4int
+    %uint_14 = OpConstant %uint 14
+%_ptr_StorageBuffer_v4uint = OpTypePointer StorageBuffer %v4uint
+        %108 = OpConstantNull %v4uint
+    %uint_15 = OpConstant %uint 15
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+        %112 = OpConstantNull %v4half
+    %uint_16 = OpConstant %uint 16
+%_ptr_StorageBuffer_mat2v2float = OpTypePointer StorageBuffer %mat2v2float
+        %116 = OpConstantNull %mat2v2float
+    %uint_17 = OpConstant %uint 17
+%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+        %120 = OpConstantNull %mat2v3float
+    %uint_18 = OpConstant %uint 18
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+        %124 = OpConstantNull %mat2v4float
+    %uint_19 = OpConstant %uint 19
+%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+        %128 = OpConstantNull %mat3v2float
+    %uint_20 = OpConstant %uint 20
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+        %132 = OpConstantNull %mat3v3float
+    %uint_21 = OpConstant %uint 21
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+        %136 = OpConstantNull %mat3v4float
+    %uint_22 = OpConstant %uint 22
+%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
+        %140 = OpConstantNull %mat4v2float
+    %uint_23 = OpConstant %uint 23
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+        %144 = OpConstantNull %mat4v3float
+    %uint_24 = OpConstant %uint 24
+%_ptr_StorageBuffer_mat4v4float = OpTypePointer StorageBuffer %mat4v4float
+        %148 = OpConstantNull %mat4v4float
+    %uint_25 = OpConstant %uint 25
+%_ptr_StorageBuffer_mat2v2half = OpTypePointer StorageBuffer %mat2v2half
+        %152 = OpConstantNull %mat2v2half
+    %uint_26 = OpConstant %uint 26
+%_ptr_StorageBuffer_mat2v3half = OpTypePointer StorageBuffer %mat2v3half
+        %156 = OpConstantNull %mat2v3half
+    %uint_27 = OpConstant %uint 27
+%_ptr_StorageBuffer_mat2v4half = OpTypePointer StorageBuffer %mat2v4half
+        %160 = OpConstantNull %mat2v4half
+    %uint_28 = OpConstant %uint 28
+%_ptr_StorageBuffer_mat3v2half = OpTypePointer StorageBuffer %mat3v2half
+        %164 = OpConstantNull %mat3v2half
+    %uint_29 = OpConstant %uint 29
+%_ptr_StorageBuffer_mat3v3half = OpTypePointer StorageBuffer %mat3v3half
+        %168 = OpConstantNull %mat3v3half
+    %uint_30 = OpConstant %uint 30
+%_ptr_StorageBuffer_mat3v4half = OpTypePointer StorageBuffer %mat3v4half
+        %172 = OpConstantNull %mat3v4half
+    %uint_31 = OpConstant %uint 31
+%_ptr_StorageBuffer_mat4v2half = OpTypePointer StorageBuffer %mat4v2half
+        %176 = OpConstantNull %mat4v2half
+    %uint_32 = OpConstant %uint 32
+%_ptr_StorageBuffer_mat4v3half = OpTypePointer StorageBuffer %mat4v3half
+        %180 = OpConstantNull %mat4v3half
+    %uint_33 = OpConstant %uint 33
+%_ptr_StorageBuffer_mat4v4half = OpTypePointer StorageBuffer %mat4v4half
+        %184 = OpConstantNull %mat4v4half
+    %uint_34 = OpConstant %uint 34
+%_ptr_StorageBuffer__arr_v3float_uint_2 = OpTypePointer StorageBuffer %_arr_v3float_uint_2
+        %188 = OpConstantNull %_arr_v3float_uint_2
+    %uint_35 = OpConstant %uint 35
+%_ptr_StorageBuffer__arr_mat4v2half_uint_2 = OpTypePointer StorageBuffer %_arr_mat4v2half_uint_2
+        %192 = OpConstantNull %_arr_mat4v2half_uint_2
+        %193 = OpTypeFunction %void
+ %main_inner = OpFunction %void None %45
+        %idx = OpFunctionParameter %uint
+         %49 = OpLabel
+         %52 = OpAccessChain %_ptr_StorageBuffer_float %sb %uint_0 %idx %uint_0
+               OpStore %52 %53
+         %56 = OpAccessChain %_ptr_StorageBuffer_int %sb %uint_0 %idx %uint_1
+               OpStore %56 %57
+         %59 = OpAccessChain %_ptr_StorageBuffer_uint %sb %uint_0 %idx %uint_2
+               OpStore %59 %60
+         %63 = OpAccessChain %_ptr_StorageBuffer_half %sb %uint_0 %idx %uint_3
+               OpStore %63 %64
+         %67 = OpAccessChain %_ptr_StorageBuffer_v2float %sb %uint_0 %idx %uint_4
+               OpStore %67 %68
+         %71 = OpAccessChain %_ptr_StorageBuffer_v2int %sb %uint_0 %idx %uint_5
+               OpStore %71 %72
+         %75 = OpAccessChain %_ptr_StorageBuffer_v2uint %sb %uint_0 %idx %uint_6
+               OpStore %75 %76
+         %79 = OpAccessChain %_ptr_StorageBuffer_v2half %sb %uint_0 %idx %uint_7
+               OpStore %79 %80
+         %83 = OpAccessChain %_ptr_StorageBuffer_v3float %sb %uint_0 %idx %uint_8
+               OpStore %83 %84
+         %87 = OpAccessChain %_ptr_StorageBuffer_v3int %sb %uint_0 %idx %uint_9
+               OpStore %87 %88
+         %91 = OpAccessChain %_ptr_StorageBuffer_v3uint %sb %uint_0 %idx %uint_10
+               OpStore %91 %92
+         %95 = OpAccessChain %_ptr_StorageBuffer_v3half %sb %uint_0 %idx %uint_11
+               OpStore %95 %96
+         %99 = OpAccessChain %_ptr_StorageBuffer_v4float %sb %uint_0 %idx %uint_12
+               OpStore %99 %100
+        %103 = OpAccessChain %_ptr_StorageBuffer_v4int %sb %uint_0 %idx %uint_13
+               OpStore %103 %104
+        %107 = OpAccessChain %_ptr_StorageBuffer_v4uint %sb %uint_0 %idx %uint_14
+               OpStore %107 %108
+        %111 = OpAccessChain %_ptr_StorageBuffer_v4half %sb %uint_0 %idx %uint_15
+               OpStore %111 %112
+        %115 = OpAccessChain %_ptr_StorageBuffer_mat2v2float %sb %uint_0 %idx %uint_16
+               OpStore %115 %116
+        %119 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %sb %uint_0 %idx %uint_17
+               OpStore %119 %120
+        %123 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %sb %uint_0 %idx %uint_18
+               OpStore %123 %124
+        %127 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %sb %uint_0 %idx %uint_19
+               OpStore %127 %128
+        %131 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %sb %uint_0 %idx %uint_20
+               OpStore %131 %132
+        %135 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %sb %uint_0 %idx %uint_21
+               OpStore %135 %136
+        %139 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %sb %uint_0 %idx %uint_22
+               OpStore %139 %140
+        %143 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %sb %uint_0 %idx %uint_23
+               OpStore %143 %144
+        %147 = OpAccessChain %_ptr_StorageBuffer_mat4v4float %sb %uint_0 %idx %uint_24
+               OpStore %147 %148
+        %151 = OpAccessChain %_ptr_StorageBuffer_mat2v2half %sb %uint_0 %idx %uint_25
+               OpStore %151 %152
+        %155 = OpAccessChain %_ptr_StorageBuffer_mat2v3half %sb %uint_0 %idx %uint_26
+               OpStore %155 %156
+        %159 = OpAccessChain %_ptr_StorageBuffer_mat2v4half %sb %uint_0 %idx %uint_27
+               OpStore %159 %160
+        %163 = OpAccessChain %_ptr_StorageBuffer_mat3v2half %sb %uint_0 %idx %uint_28
+               OpStore %163 %164
+        %167 = OpAccessChain %_ptr_StorageBuffer_mat3v3half %sb %uint_0 %idx %uint_29
+               OpStore %167 %168
+        %171 = OpAccessChain %_ptr_StorageBuffer_mat3v4half %sb %uint_0 %idx %uint_30
+               OpStore %171 %172
+        %175 = OpAccessChain %_ptr_StorageBuffer_mat4v2half %sb %uint_0 %idx %uint_31
+               OpStore %175 %176
+        %179 = OpAccessChain %_ptr_StorageBuffer_mat4v3half %sb %uint_0 %idx %uint_32
+               OpStore %179 %180
+        %183 = OpAccessChain %_ptr_StorageBuffer_mat4v4half %sb %uint_0 %idx %uint_33
+               OpStore %183 %184
+        %187 = OpAccessChain %_ptr_StorageBuffer__arr_v3float_uint_2 %sb %uint_0 %idx %uint_34
+               OpStore %187 %188
+        %191 = OpAccessChain %_ptr_StorageBuffer__arr_mat4v2half_uint_2 %sb %uint_0 %idx %uint_35
+               OpStore %191 %192
+               OpReturn
+               OpFunctionEnd
+       %main = OpFunction %void None %193
+        %195 = OpLabel
+        %197 = OpLoad %uint %idx_1
+        %196 = OpFunctionCall %void %main_inner %197
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..c81934c
--- /dev/null
+++ b/test/tint/buffer/storage/dynamic_index/write_f16.wgsl.expected.wgsl

@@ -0,0 +1,86 @@
+enable f16;
+
+struct Inner {
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+}
+
+struct S {
+  arr : array<Inner>,
+}
+
+@binding(0) @group(0) var<storage, read_write> sb : S;
+
+@compute @workgroup_size(1)
+fn main(@builtin(local_invocation_index) idx : u32) {
+  sb.arr[idx].scalar_f32 = f32();
+  sb.arr[idx].scalar_i32 = i32();
+  sb.arr[idx].scalar_u32 = u32();
+  sb.arr[idx].scalar_f16 = f16();
+  sb.arr[idx].vec2_f32 = vec2<f32>();
+  sb.arr[idx].vec2_i32 = vec2<i32>();
+  sb.arr[idx].vec2_u32 = vec2<u32>();
+  sb.arr[idx].vec2_f16 = vec2<f16>();
+  sb.arr[idx].vec3_f32 = vec3<f32>();
+  sb.arr[idx].vec3_i32 = vec3<i32>();
+  sb.arr[idx].vec3_u32 = vec3<u32>();
+  sb.arr[idx].vec3_f16 = vec3<f16>();
+  sb.arr[idx].vec4_f32 = vec4<f32>();
+  sb.arr[idx].vec4_i32 = vec4<i32>();
+  sb.arr[idx].vec4_u32 = vec4<u32>();
+  sb.arr[idx].vec4_f16 = vec4<f16>();
+  sb.arr[idx].mat2x2_f32 = mat2x2<f32>();
+  sb.arr[idx].mat2x3_f32 = mat2x3<f32>();
+  sb.arr[idx].mat2x4_f32 = mat2x4<f32>();
+  sb.arr[idx].mat3x2_f32 = mat3x2<f32>();
+  sb.arr[idx].mat3x3_f32 = mat3x3<f32>();
+  sb.arr[idx].mat3x4_f32 = mat3x4<f32>();
+  sb.arr[idx].mat4x2_f32 = mat4x2<f32>();
+  sb.arr[idx].mat4x3_f32 = mat4x3<f32>();
+  sb.arr[idx].mat4x4_f32 = mat4x4<f32>();
+  sb.arr[idx].mat2x2_f16 = mat2x2<f16>();
+  sb.arr[idx].mat2x3_f16 = mat2x3<f16>();
+  sb.arr[idx].mat2x4_f16 = mat2x4<f16>();
+  sb.arr[idx].mat3x2_f16 = mat3x2<f16>();
+  sb.arr[idx].mat3x3_f16 = mat3x3<f16>();
+  sb.arr[idx].mat3x4_f16 = mat3x4<f16>();
+  sb.arr[idx].mat4x2_f16 = mat4x2<f16>();
+  sb.arr[idx].mat4x3_f16 = mat4x3<f16>();
+  sb.arr[idx].mat4x4_f16 = mat4x4<f16>();
+  sb.arr[idx].arr2_vec3_f32 = array<vec3<f32>, 2>();
+  sb.arr[idx].arr2_mat4x2_f16 = array<mat4x2<f16>, 2>();
+}

diff --git a/test/tint/buffer/storage/static_index/read.wgsl b/test/tint/buffer/storage/static_index/read.wgsl
index 84c5148..80b9d10 100644
--- a/test/tint/buffer/storage/static_index/read.wgsl
+++ b/test/tint/buffer/storage/static_index/read.wgsl

@@ -1,32 +1,61 @@
 struct Inner {
-    x : i32,
+    scalar_i32 : i32,
+    scalar_f32 : f32,
 };
 
 struct S {
-    a : vec3<i32>,
-    b : i32,
-    c : vec3<u32>,
-    d : u32,
-    e : vec3<f32>,
-    f : f32,
-    g : mat2x3<f32>,
-    h : mat3x2<f32>,
-    i : Inner,
-    j : array<Inner, 4>,
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    arr2_vec3_f32 : array<vec3<f32>, 2>,
+    struct_inner : Inner,
+    array_struct_inner : array<Inner, 4>,
 };
 
-@binding(0) @group(0) var<storage, read> s : S;
+@binding(0) @group(0) var<storage, read> sb : S;
 
 @compute @workgroup_size(1)
 fn main() {
-    let a = s.a;
-    let b = s.b;
-    let c = s.c;
-    let d = s.d;
-    let e = s.e;
-    let f = s.f;
-    let g = s.g;
-    let h = s.h;
-    let i = s.i;
-    let j = s.j;
+    let scalar_f32 = sb.scalar_f32;
+    let scalar_i32 = sb.scalar_i32;
+    let scalar_u32 = sb.scalar_u32;
+    let vec2_f32 = sb.vec2_f32;
+    let vec2_i32 = sb.vec2_i32;
+    let vec2_u32 = sb.vec2_u32;
+    let vec3_f32 = sb.vec3_f32;
+    let vec3_i32 = sb.vec3_i32;
+    let vec3_u32 = sb.vec3_u32;
+    let vec4_f32 = sb.vec4_f32;
+    let vec4_i32 = sb.vec4_i32;
+    let vec4_u32 = sb.vec4_u32;
+    let mat2x2_f32 = sb.mat2x2_f32;
+    let mat2x3_f32 = sb.mat2x3_f32;
+    let mat2x4_f32 = sb.mat2x4_f32;
+    let mat3x2_f32 = sb.mat3x2_f32;
+    let mat3x3_f32 = sb.mat3x3_f32;
+    let mat3x4_f32 = sb.mat3x4_f32;
+    let mat4x2_f32 = sb.mat4x2_f32;
+    let mat4x3_f32 = sb.mat4x3_f32;
+    let mat4x4_f32 = sb.mat4x4_f32;
+    let arr2_vec3_f32 = sb.arr2_vec3_f32;
+    let struct_inner = sb.struct_inner;
+    let array_struct_inner = sb.array_struct_inner;
 }

diff --git a/test/tint/buffer/storage/static_index/read.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/static_index/read.wgsl.expected.dxc.hlsl
index 501435d..49b16b2 100644
--- a/test/tint/buffer/storage/static_index/read.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/storage/static_index/read.wgsl.expected.dxc.hlsl

@@ -1,44 +1,98 @@
 struct Inner {
-  int x;
+  int scalar_i32;
+  float scalar_f32;
 };
 
-ByteAddressBuffer s : register(t0, space0);
+ByteAddressBuffer sb : register(t0, space0);
 
-float2x3 tint_symbol_6(ByteAddressBuffer buffer, uint offset) {
+float2x2 tint_symbol_12(ByteAddressBuffer buffer, uint offset) {
+  return float2x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))));
+}
+
+float2x3 tint_symbol_13(ByteAddressBuffer buffer, uint offset) {
   return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));
 }
 
-float3x2 tint_symbol_7(ByteAddressBuffer buffer, uint offset) {
+float2x4 tint_symbol_14(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+float3x2 tint_symbol_15(ByteAddressBuffer buffer, uint offset) {
   return float3x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))));
 }
 
-Inner tint_symbol_9(ByteAddressBuffer buffer, uint offset) {
-  const Inner tint_symbol_11 = {asint(buffer.Load((offset + 0u)))};
-  return tint_symbol_11;
+float3x3 tint_symbol_16(ByteAddressBuffer buffer, uint offset) {
+  return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));
 }
 
-typedef Inner tint_symbol_10_ret[4];
-tint_symbol_10_ret tint_symbol_10(ByteAddressBuffer buffer, uint offset) {
-  Inner arr[4] = (Inner[4])0;
+float3x4 tint_symbol_17(ByteAddressBuffer buffer, uint offset) {
+  return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));
+}
+
+float4x2 tint_symbol_18(ByteAddressBuffer buffer, uint offset) {
+  return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));
+}
+
+float4x3 tint_symbol_19(ByteAddressBuffer buffer, uint offset) {
+  return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));
+}
+
+float4x4 tint_symbol_20(ByteAddressBuffer buffer, uint offset) {
+  return float4x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))), asfloat(buffer.Load4((offset + 48u))));
+}
+
+typedef float3 tint_symbol_21_ret[2];
+tint_symbol_21_ret tint_symbol_21(ByteAddressBuffer buffer, uint offset) {
+  float3 arr[2] = (float3[2])0;
   {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_9(buffer, (offset + (i_1 * 4u)));
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      arr[i] = asfloat(buffer.Load3((offset + (i * 16u))));
     }
   }
   return arr;
 }
 
+Inner tint_symbol_22(ByteAddressBuffer buffer, uint offset) {
+  const Inner tint_symbol_24 = {asint(buffer.Load((offset + 0u))), asfloat(buffer.Load((offset + 4u)))};
+  return tint_symbol_24;
+}
+
+typedef Inner tint_symbol_23_ret[4];
+tint_symbol_23_ret tint_symbol_23(ByteAddressBuffer buffer, uint offset) {
+  Inner arr_1[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_22(buffer, (offset + (i_1 * 8u)));
+    }
+  }
+  return arr_1;
+}
+
 [numthreads(1, 1, 1)]
 void main() {
-  const int3 a = asint(s.Load3(0u));
-  const int b = asint(s.Load(12u));
-  const uint3 c = s.Load3(16u);
-  const uint d = s.Load(28u);
-  const float3 e = asfloat(s.Load3(32u));
-  const float f = asfloat(s.Load(44u));
-  const float2x3 g = tint_symbol_6(s, 48u);
-  const float3x2 h = tint_symbol_7(s, 80u);
-  const Inner i = tint_symbol_9(s, 104u);
-  const Inner j[4] = tint_symbol_10(s, 108u);
+  const float scalar_f32 = asfloat(sb.Load(0u));
+  const int scalar_i32 = asint(sb.Load(4u));
+  const uint scalar_u32 = sb.Load(8u);
+  const float2 vec2_f32 = asfloat(sb.Load2(16u));
+  const int2 vec2_i32 = asint(sb.Load2(24u));
+  const uint2 vec2_u32 = sb.Load2(32u);
+  const float3 vec3_f32 = asfloat(sb.Load3(48u));
+  const int3 vec3_i32 = asint(sb.Load3(64u));
+  const uint3 vec3_u32 = sb.Load3(80u);
+  const float4 vec4_f32 = asfloat(sb.Load4(96u));
+  const int4 vec4_i32 = asint(sb.Load4(112u));
+  const uint4 vec4_u32 = sb.Load4(128u);
+  const float2x2 mat2x2_f32 = tint_symbol_12(sb, 144u);
+  const float2x3 mat2x3_f32 = tint_symbol_13(sb, 160u);
+  const float2x4 mat2x4_f32 = tint_symbol_14(sb, 192u);
+  const float3x2 mat3x2_f32 = tint_symbol_15(sb, 224u);
+  const float3x3 mat3x3_f32 = tint_symbol_16(sb, 256u);
+  const float3x4 mat3x4_f32 = tint_symbol_17(sb, 304u);
+  const float4x2 mat4x2_f32 = tint_symbol_18(sb, 352u);
+  const float4x3 mat4x3_f32 = tint_symbol_19(sb, 384u);
+  const float4x4 mat4x4_f32 = tint_symbol_20(sb, 448u);
+  const float3 arr2_vec3_f32[2] = tint_symbol_21(sb, 512u);
+  const Inner struct_inner = tint_symbol_22(sb, 544u);
+  const Inner array_struct_inner[4] = tint_symbol_23(sb, 552u);
   return;
 }

diff --git a/test/tint/buffer/storage/static_index/read.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/static_index/read.wgsl.expected.fxc.hlsl
index 501435d..49b16b2 100644
--- a/test/tint/buffer/storage/static_index/read.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/storage/static_index/read.wgsl.expected.fxc.hlsl

@@ -1,44 +1,98 @@
 struct Inner {
-  int x;
+  int scalar_i32;
+  float scalar_f32;
 };
 
-ByteAddressBuffer s : register(t0, space0);
+ByteAddressBuffer sb : register(t0, space0);
 
-float2x3 tint_symbol_6(ByteAddressBuffer buffer, uint offset) {
+float2x2 tint_symbol_12(ByteAddressBuffer buffer, uint offset) {
+  return float2x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))));
+}
+
+float2x3 tint_symbol_13(ByteAddressBuffer buffer, uint offset) {
   return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));
 }
 
-float3x2 tint_symbol_7(ByteAddressBuffer buffer, uint offset) {
+float2x4 tint_symbol_14(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+float3x2 tint_symbol_15(ByteAddressBuffer buffer, uint offset) {
   return float3x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))));
 }
 
-Inner tint_symbol_9(ByteAddressBuffer buffer, uint offset) {
-  const Inner tint_symbol_11 = {asint(buffer.Load((offset + 0u)))};
-  return tint_symbol_11;
+float3x3 tint_symbol_16(ByteAddressBuffer buffer, uint offset) {
+  return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));
 }
 
-typedef Inner tint_symbol_10_ret[4];
-tint_symbol_10_ret tint_symbol_10(ByteAddressBuffer buffer, uint offset) {
-  Inner arr[4] = (Inner[4])0;
+float3x4 tint_symbol_17(ByteAddressBuffer buffer, uint offset) {
+  return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));
+}
+
+float4x2 tint_symbol_18(ByteAddressBuffer buffer, uint offset) {
+  return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));
+}
+
+float4x3 tint_symbol_19(ByteAddressBuffer buffer, uint offset) {
+  return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));
+}
+
+float4x4 tint_symbol_20(ByteAddressBuffer buffer, uint offset) {
+  return float4x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))), asfloat(buffer.Load4((offset + 48u))));
+}
+
+typedef float3 tint_symbol_21_ret[2];
+tint_symbol_21_ret tint_symbol_21(ByteAddressBuffer buffer, uint offset) {
+  float3 arr[2] = (float3[2])0;
   {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_9(buffer, (offset + (i_1 * 4u)));
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      arr[i] = asfloat(buffer.Load3((offset + (i * 16u))));
     }
   }
   return arr;
 }
 
+Inner tint_symbol_22(ByteAddressBuffer buffer, uint offset) {
+  const Inner tint_symbol_24 = {asint(buffer.Load((offset + 0u))), asfloat(buffer.Load((offset + 4u)))};
+  return tint_symbol_24;
+}
+
+typedef Inner tint_symbol_23_ret[4];
+tint_symbol_23_ret tint_symbol_23(ByteAddressBuffer buffer, uint offset) {
+  Inner arr_1[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_22(buffer, (offset + (i_1 * 8u)));
+    }
+  }
+  return arr_1;
+}
+
 [numthreads(1, 1, 1)]
 void main() {
-  const int3 a = asint(s.Load3(0u));
-  const int b = asint(s.Load(12u));
-  const uint3 c = s.Load3(16u);
-  const uint d = s.Load(28u);
-  const float3 e = asfloat(s.Load3(32u));
-  const float f = asfloat(s.Load(44u));
-  const float2x3 g = tint_symbol_6(s, 48u);
-  const float3x2 h = tint_symbol_7(s, 80u);
-  const Inner i = tint_symbol_9(s, 104u);
-  const Inner j[4] = tint_symbol_10(s, 108u);
+  const float scalar_f32 = asfloat(sb.Load(0u));
+  const int scalar_i32 = asint(sb.Load(4u));
+  const uint scalar_u32 = sb.Load(8u);
+  const float2 vec2_f32 = asfloat(sb.Load2(16u));
+  const int2 vec2_i32 = asint(sb.Load2(24u));
+  const uint2 vec2_u32 = sb.Load2(32u);
+  const float3 vec3_f32 = asfloat(sb.Load3(48u));
+  const int3 vec3_i32 = asint(sb.Load3(64u));
+  const uint3 vec3_u32 = sb.Load3(80u);
+  const float4 vec4_f32 = asfloat(sb.Load4(96u));
+  const int4 vec4_i32 = asint(sb.Load4(112u));
+  const uint4 vec4_u32 = sb.Load4(128u);
+  const float2x2 mat2x2_f32 = tint_symbol_12(sb, 144u);
+  const float2x3 mat2x3_f32 = tint_symbol_13(sb, 160u);
+  const float2x4 mat2x4_f32 = tint_symbol_14(sb, 192u);
+  const float3x2 mat3x2_f32 = tint_symbol_15(sb, 224u);
+  const float3x3 mat3x3_f32 = tint_symbol_16(sb, 256u);
+  const float3x4 mat3x4_f32 = tint_symbol_17(sb, 304u);
+  const float4x2 mat4x2_f32 = tint_symbol_18(sb, 352u);
+  const float4x3 mat4x3_f32 = tint_symbol_19(sb, 384u);
+  const float4x4 mat4x4_f32 = tint_symbol_20(sb, 448u);
+  const float3 arr2_vec3_f32[2] = tint_symbol_21(sb, 512u);
+  const Inner struct_inner = tint_symbol_22(sb, 544u);
+  const Inner array_struct_inner[4] = tint_symbol_23(sb, 552u);
   return;
 }

diff --git a/test/tint/buffer/storage/static_index/read.wgsl.expected.glsl b/test/tint/buffer/storage/static_index/read.wgsl.expected.glsl
index d2e4edd..59613b5 100644
--- a/test/tint/buffer/storage/static_index/read.wgsl.expected.glsl
+++ b/test/tint/buffer/storage/static_index/read.wgsl.expected.glsl

@@ -1,38 +1,76 @@
 #version 310 es
 
 struct Inner {
-  int x;
+  int scalar_i32;
+  float scalar_f32;
 };
 
 struct S {
-  ivec3 a;
-  int b;
-  uvec3 c;
-  uint d;
-  vec3 e;
-  float f;
-  mat2x3 g;
-  mat3x2 h;
-  Inner i;
-  Inner j[4];
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
   uint pad;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  uint pad_1;
+  uint pad_2;
+  vec3 vec3_f32;
+  uint pad_3;
+  ivec3 vec3_i32;
+  uint pad_4;
+  uvec3 vec3_u32;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  mat2 mat2x2_f32;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_6;
+  uint pad_7;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  vec3 arr2_vec3_f32[2];
+  Inner struct_inner;
+  Inner array_struct_inner[4];
+  uint pad_8;
+  uint pad_9;
 };
 
-layout(binding = 0, std430) buffer s_block_ssbo {
+layout(binding = 0, std430) buffer sb_block_ssbo {
   S inner;
-} s;
+} sb;
 
 void tint_symbol() {
-  ivec3 a = s.inner.a;
-  int b = s.inner.b;
-  uvec3 c = s.inner.c;
-  uint d = s.inner.d;
-  vec3 e = s.inner.e;
-  float f = s.inner.f;
-  mat2x3 g = s.inner.g;
-  mat3x2 h = s.inner.h;
-  Inner i = s.inner.i;
-  Inner j[4] = s.inner.j;
+  float scalar_f32 = sb.inner.scalar_f32;
+  int scalar_i32 = sb.inner.scalar_i32;
+  uint scalar_u32 = sb.inner.scalar_u32;
+  vec2 vec2_f32 = sb.inner.vec2_f32;
+  ivec2 vec2_i32 = sb.inner.vec2_i32;
+  uvec2 vec2_u32 = sb.inner.vec2_u32;
+  vec3 vec3_f32 = sb.inner.vec3_f32;
+  ivec3 vec3_i32 = sb.inner.vec3_i32;
+  uvec3 vec3_u32 = sb.inner.vec3_u32;
+  vec4 vec4_f32 = sb.inner.vec4_f32;
+  ivec4 vec4_i32 = sb.inner.vec4_i32;
+  uvec4 vec4_u32 = sb.inner.vec4_u32;
+  mat2 mat2x2_f32 = sb.inner.mat2x2_f32;
+  mat2x3 mat2x3_f32 = sb.inner.mat2x3_f32;
+  mat2x4 mat2x4_f32 = sb.inner.mat2x4_f32;
+  mat3x2 mat3x2_f32 = sb.inner.mat3x2_f32;
+  mat3 mat3x3_f32 = sb.inner.mat3x3_f32;
+  mat3x4 mat3x4_f32 = sb.inner.mat3x4_f32;
+  mat4x2 mat4x2_f32 = sb.inner.mat4x2_f32;
+  mat4x3 mat4x3_f32 = sb.inner.mat4x3_f32;
+  mat4 mat4x4_f32 = sb.inner.mat4x4_f32;
+  vec3 arr2_vec3_f32[2] = sb.inner.arr2_vec3_f32;
+  Inner struct_inner = sb.inner.struct_inner;
+  Inner array_struct_inner[4] = sb.inner.array_struct_inner;
 }
 
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

diff --git a/test/tint/buffer/storage/static_index/read.wgsl.expected.msl b/test/tint/buffer/storage/static_index/read.wgsl.expected.msl
index db51912..b0707fe 100644
--- a/test/tint/buffer/storage/static_index/read.wgsl.expected.msl
+++ b/test/tint/buffer/storage/static_index/read.wgsl.expected.msl

@@ -15,34 +15,69 @@
 };
 
 struct Inner {
-  /* 0x0000 */ int x;
+  /* 0x0000 */ int scalar_i32;
+  /* 0x0004 */ float scalar_f32;
 };
 
 struct S {
-  /* 0x0000 */ packed_int3 a;
-  /* 0x000c */ int b;
-  /* 0x0010 */ packed_uint3 c;
-  /* 0x001c */ uint d;
-  /* 0x0020 */ packed_float3 e;
-  /* 0x002c */ float f;
-  /* 0x0030 */ float2x3 g;
-  /* 0x0050 */ float3x2 h;
-  /* 0x0068 */ Inner i;
-  /* 0x006c */ tint_array<Inner, 4> j;
-  /* 0x007c */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ tint_array<int8_t, 8> tint_pad_1;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_3;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0060 */ float4 vec4_f32;
+  /* 0x0070 */ int4 vec4_i32;
+  /* 0x0080 */ uint4 vec4_u32;
+  /* 0x0090 */ float2x2 mat2x2_f32;
+  /* 0x00a0 */ float2x3 mat2x3_f32;
+  /* 0x00c0 */ float2x4 mat2x4_f32;
+  /* 0x00e0 */ float3x2 mat3x2_f32;
+  /* 0x00f8 */ tint_array<int8_t, 8> tint_pad_5;
+  /* 0x0100 */ float3x3 mat3x3_f32;
+  /* 0x0130 */ float3x4 mat3x4_f32;
+  /* 0x0160 */ float4x2 mat4x2_f32;
+  /* 0x0180 */ float4x3 mat4x3_f32;
+  /* 0x01c0 */ float4x4 mat4x4_f32;
+  /* 0x0200 */ tint_array<float3, 2> arr2_vec3_f32;
+  /* 0x0220 */ Inner struct_inner;
+  /* 0x0228 */ tint_array<Inner, 4> array_struct_inner;
+  /* 0x0248 */ tint_array<int8_t, 8> tint_pad_6;
 };
 
 kernel void tint_symbol(const device S* tint_symbol_1 [[buffer(0)]]) {
-  int3 const a = int3((*(tint_symbol_1)).a);
-  int const b = (*(tint_symbol_1)).b;
-  uint3 const c = uint3((*(tint_symbol_1)).c);
-  uint const d = (*(tint_symbol_1)).d;
-  float3 const e = float3((*(tint_symbol_1)).e);
-  float const f = (*(tint_symbol_1)).f;
-  float2x3 const g = (*(tint_symbol_1)).g;
-  float3x2 const h = (*(tint_symbol_1)).h;
-  Inner const i = (*(tint_symbol_1)).i;
-  tint_array<Inner, 4> const j = (*(tint_symbol_1)).j;
+  float const scalar_f32 = (*(tint_symbol_1)).scalar_f32;
+  int const scalar_i32 = (*(tint_symbol_1)).scalar_i32;
+  uint const scalar_u32 = (*(tint_symbol_1)).scalar_u32;
+  float2 const vec2_f32 = (*(tint_symbol_1)).vec2_f32;
+  int2 const vec2_i32 = (*(tint_symbol_1)).vec2_i32;
+  uint2 const vec2_u32 = (*(tint_symbol_1)).vec2_u32;
+  float3 const vec3_f32 = float3((*(tint_symbol_1)).vec3_f32);
+  int3 const vec3_i32 = int3((*(tint_symbol_1)).vec3_i32);
+  uint3 const vec3_u32 = uint3((*(tint_symbol_1)).vec3_u32);
+  float4 const vec4_f32 = (*(tint_symbol_1)).vec4_f32;
+  int4 const vec4_i32 = (*(tint_symbol_1)).vec4_i32;
+  uint4 const vec4_u32 = (*(tint_symbol_1)).vec4_u32;
+  float2x2 const mat2x2_f32 = (*(tint_symbol_1)).mat2x2_f32;
+  float2x3 const mat2x3_f32 = (*(tint_symbol_1)).mat2x3_f32;
+  float2x4 const mat2x4_f32 = (*(tint_symbol_1)).mat2x4_f32;
+  float3x2 const mat3x2_f32 = (*(tint_symbol_1)).mat3x2_f32;
+  float3x3 const mat3x3_f32 = (*(tint_symbol_1)).mat3x3_f32;
+  float3x4 const mat3x4_f32 = (*(tint_symbol_1)).mat3x4_f32;
+  float4x2 const mat4x2_f32 = (*(tint_symbol_1)).mat4x2_f32;
+  float4x3 const mat4x3_f32 = (*(tint_symbol_1)).mat4x3_f32;
+  float4x4 const mat4x4_f32 = (*(tint_symbol_1)).mat4x4_f32;
+  tint_array<float3, 2> const arr2_vec3_f32 = (*(tint_symbol_1)).arr2_vec3_f32;
+  Inner const struct_inner = (*(tint_symbol_1)).struct_inner;
+  tint_array<Inner, 4> const array_struct_inner = (*(tint_symbol_1)).array_struct_inner;
   return;
 }
 

diff --git a/test/tint/buffer/storage/static_index/read.wgsl.expected.spvasm b/test/tint/buffer/storage/static_index/read.wgsl.expected.spvasm
index e301a46..c137494 100644
--- a/test/tint/buffer/storage/static_index/read.wgsl.expected.spvasm
+++ b/test/tint/buffer/storage/static_index/read.wgsl.expected.spvasm

@@ -1,108 +1,222 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 0
-; Bound: 60
+; Bound: 129
 ; Schema: 0
                OpCapability Shader
                OpMemoryModel Logical GLSL450
                OpEntryPoint GLCompute %main "main"
                OpExecutionMode %main LocalSize 1 1 1
-               OpName %s_block "s_block"
-               OpMemberName %s_block 0 "inner"
+               OpName %sb_block "sb_block"
+               OpMemberName %sb_block 0 "inner"
                OpName %S "S"
-               OpMemberName %S 0 "a"
-               OpMemberName %S 1 "b"
-               OpMemberName %S 2 "c"
-               OpMemberName %S 3 "d"
-               OpMemberName %S 4 "e"
-               OpMemberName %S 5 "f"
-               OpMemberName %S 6 "g"
-               OpMemberName %S 7 "h"
-               OpMemberName %S 8 "i"
+               OpMemberName %S 0 "scalar_f32"
+               OpMemberName %S 1 "scalar_i32"
+               OpMemberName %S 2 "scalar_u32"
+               OpMemberName %S 3 "vec2_f32"
+               OpMemberName %S 4 "vec2_i32"
+               OpMemberName %S 5 "vec2_u32"
+               OpMemberName %S 6 "vec3_f32"
+               OpMemberName %S 7 "vec3_i32"
+               OpMemberName %S 8 "vec3_u32"
+               OpMemberName %S 9 "vec4_f32"
+               OpMemberName %S 10 "vec4_i32"
+               OpMemberName %S 11 "vec4_u32"
+               OpMemberName %S 12 "mat2x2_f32"
+               OpMemberName %S 13 "mat2x3_f32"
+               OpMemberName %S 14 "mat2x4_f32"
+               OpMemberName %S 15 "mat3x2_f32"
+               OpMemberName %S 16 "mat3x3_f32"
+               OpMemberName %S 17 "mat3x4_f32"
+               OpMemberName %S 18 "mat4x2_f32"
+               OpMemberName %S 19 "mat4x3_f32"
+               OpMemberName %S 20 "mat4x4_f32"
+               OpMemberName %S 21 "arr2_vec3_f32"
+               OpMemberName %S 22 "struct_inner"
                OpName %Inner "Inner"
-               OpMemberName %Inner 0 "x"
-               OpMemberName %S 9 "j"
-               OpName %s "s"
+               OpMemberName %Inner 0 "scalar_i32"
+               OpMemberName %Inner 1 "scalar_f32"
+               OpMemberName %S 23 "array_struct_inner"
+               OpName %sb "sb"
                OpName %main "main"
-               OpDecorate %s_block Block
-               OpMemberDecorate %s_block 0 Offset 0
+               OpDecorate %sb_block Block
+               OpMemberDecorate %sb_block 0 Offset 0
                OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 12
-               OpMemberDecorate %S 2 Offset 16
-               OpMemberDecorate %S 3 Offset 28
-               OpMemberDecorate %S 4 Offset 32
-               OpMemberDecorate %S 5 Offset 44
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 2 Offset 8
+               OpMemberDecorate %S 3 Offset 16
+               OpMemberDecorate %S 4 Offset 24
+               OpMemberDecorate %S 5 Offset 32
                OpMemberDecorate %S 6 Offset 48
-               OpMemberDecorate %S 6 ColMajor
-               OpMemberDecorate %S 6 MatrixStride 16
-               OpMemberDecorate %S 7 Offset 80
-               OpMemberDecorate %S 7 ColMajor
-               OpMemberDecorate %S 7 MatrixStride 8
-               OpMemberDecorate %S 8 Offset 104
+               OpMemberDecorate %S 7 Offset 64
+               OpMemberDecorate %S 8 Offset 80
+               OpMemberDecorate %S 9 Offset 96
+               OpMemberDecorate %S 10 Offset 112
+               OpMemberDecorate %S 11 Offset 128
+               OpMemberDecorate %S 12 Offset 144
+               OpMemberDecorate %S 12 ColMajor
+               OpMemberDecorate %S 12 MatrixStride 8
+               OpMemberDecorate %S 13 Offset 160
+               OpMemberDecorate %S 13 ColMajor
+               OpMemberDecorate %S 13 MatrixStride 16
+               OpMemberDecorate %S 14 Offset 192
+               OpMemberDecorate %S 14 ColMajor
+               OpMemberDecorate %S 14 MatrixStride 16
+               OpMemberDecorate %S 15 Offset 224
+               OpMemberDecorate %S 15 ColMajor
+               OpMemberDecorate %S 15 MatrixStride 8
+               OpMemberDecorate %S 16 Offset 256
+               OpMemberDecorate %S 16 ColMajor
+               OpMemberDecorate %S 16 MatrixStride 16
+               OpMemberDecorate %S 17 Offset 304
+               OpMemberDecorate %S 17 ColMajor
+               OpMemberDecorate %S 17 MatrixStride 16
+               OpMemberDecorate %S 18 Offset 352
+               OpMemberDecorate %S 18 ColMajor
+               OpMemberDecorate %S 18 MatrixStride 8
+               OpMemberDecorate %S 19 Offset 384
+               OpMemberDecorate %S 19 ColMajor
+               OpMemberDecorate %S 19 MatrixStride 16
+               OpMemberDecorate %S 20 Offset 448
+               OpMemberDecorate %S 20 ColMajor
+               OpMemberDecorate %S 20 MatrixStride 16
+               OpMemberDecorate %S 21 Offset 512
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpMemberDecorate %S 22 Offset 544
                OpMemberDecorate %Inner 0 Offset 0
-               OpMemberDecorate %S 9 Offset 108
-               OpDecorate %_arr_Inner_uint_4 ArrayStride 4
-               OpDecorate %s NonWritable
-               OpDecorate %s Binding 0
-               OpDecorate %s DescriptorSet 0
-        %int = OpTypeInt 32 1
-      %v3int = OpTypeVector %int 3
-       %uint = OpTypeInt 32 0
-     %v3uint = OpTypeVector %uint 3
+               OpMemberDecorate %Inner 1 Offset 4
+               OpMemberDecorate %S 23 Offset 552
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 8
+               OpDecorate %sb NonWritable
+               OpDecorate %sb Binding 0
+               OpDecorate %sb DescriptorSet 0
       %float = OpTypeFloat 32
-    %v3float = OpTypeVector %float 3
-%mat2v3float = OpTypeMatrix %v3float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
     %v2float = OpTypeVector %float 2
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+    %v3float = OpTypeVector %float 3
+      %v3int = OpTypeVector %int 3
+     %v3uint = OpTypeVector %uint 3
+    %v4float = OpTypeVector %float 4
+      %v4int = OpTypeVector %int 4
+     %v4uint = OpTypeVector %uint 4
+%mat2v2float = OpTypeMatrix %v2float 2
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
 %mat3v2float = OpTypeMatrix %v2float 3
-      %Inner = OpTypeStruct %int
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v2float = OpTypeMatrix %v2float 4
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+      %Inner = OpTypeStruct %int %float
      %uint_4 = OpConstant %uint 4
 %_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
-          %S = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %mat2v3float %mat3v2float %Inner %_arr_Inner_uint_4
-    %s_block = OpTypeStruct %S
-%_ptr_StorageBuffer_s_block = OpTypePointer StorageBuffer %s_block
-          %s = OpVariable %_ptr_StorageBuffer_s_block StorageBuffer
+          %S = OpTypeStruct %float %int %uint %v2float %v2int %v2uint %v3float %v3int %v3uint %v4float %v4int %v4uint %mat2v2float %mat2v3float %mat2v4float %mat3v2float %mat3v3float %mat3v4float %mat4v2float %mat4v3float %mat4v4float %_arr_v3float_uint_2 %Inner %_arr_Inner_uint_4
+   %sb_block = OpTypeStruct %S
+%_ptr_StorageBuffer_sb_block = OpTypePointer StorageBuffer %sb_block
+         %sb = OpVariable %_ptr_StorageBuffer_sb_block StorageBuffer
        %void = OpTypeVoid
-         %17 = OpTypeFunction %void
+         %31 = OpTypeFunction %void
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
-     %uint_2 = OpConstant %uint 2
-%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
-     %uint_3 = OpConstant %uint 3
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
-%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+     %uint_3 = OpConstant %uint 3
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+%_ptr_StorageBuffer_v2int = OpTypePointer StorageBuffer %v2int
      %uint_5 = OpConstant %uint 5
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
      %uint_6 = OpConstant %uint 6
-%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
      %uint_7 = OpConstant %uint 7
-%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
      %uint_8 = OpConstant %uint 8
-%_ptr_StorageBuffer_Inner = OpTypePointer StorageBuffer %Inner
+%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
      %uint_9 = OpConstant %uint 9
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+    %uint_10 = OpConstant %uint 10
+%_ptr_StorageBuffer_v4int = OpTypePointer StorageBuffer %v4int
+    %uint_11 = OpConstant %uint 11
+%_ptr_StorageBuffer_v4uint = OpTypePointer StorageBuffer %v4uint
+    %uint_12 = OpConstant %uint 12
+%_ptr_StorageBuffer_mat2v2float = OpTypePointer StorageBuffer %mat2v2float
+    %uint_13 = OpConstant %uint 13
+%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+    %uint_14 = OpConstant %uint 14
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+    %uint_15 = OpConstant %uint 15
+%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+    %uint_16 = OpConstant %uint 16
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+    %uint_17 = OpConstant %uint 17
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+    %uint_18 = OpConstant %uint 18
+%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
+    %uint_19 = OpConstant %uint 19
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+    %uint_20 = OpConstant %uint 20
+%_ptr_StorageBuffer_mat4v4float = OpTypePointer StorageBuffer %mat4v4float
+    %uint_21 = OpConstant %uint 21
+%_ptr_StorageBuffer__arr_v3float_uint_2 = OpTypePointer StorageBuffer %_arr_v3float_uint_2
+    %uint_22 = OpConstant %uint 22
+%_ptr_StorageBuffer_Inner = OpTypePointer StorageBuffer %Inner
+    %uint_23 = OpConstant %uint 23
 %_ptr_StorageBuffer__arr_Inner_uint_4 = OpTypePointer StorageBuffer %_arr_Inner_uint_4
-       %main = OpFunction %void None %17
-         %20 = OpLabel
-         %23 = OpAccessChain %_ptr_StorageBuffer_v3int %s %uint_0 %uint_0
-         %24 = OpLoad %v3int %23
-         %27 = OpAccessChain %_ptr_StorageBuffer_int %s %uint_0 %uint_1
-         %28 = OpLoad %int %27
-         %31 = OpAccessChain %_ptr_StorageBuffer_v3uint %s %uint_0 %uint_2
-         %32 = OpLoad %v3uint %31
-         %35 = OpAccessChain %_ptr_StorageBuffer_uint %s %uint_0 %uint_3
-         %36 = OpLoad %uint %35
-         %38 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %uint_4
-         %39 = OpLoad %v3float %38
-         %42 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %uint_5
-         %43 = OpLoad %float %42
-         %46 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %s %uint_0 %uint_6
-         %47 = OpLoad %mat2v3float %46
-         %50 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %s %uint_0 %uint_7
-         %51 = OpLoad %mat3v2float %50
-         %54 = OpAccessChain %_ptr_StorageBuffer_Inner %s %uint_0 %uint_8
-         %55 = OpLoad %Inner %54
-         %58 = OpAccessChain %_ptr_StorageBuffer__arr_Inner_uint_4 %s %uint_0 %uint_9
-         %59 = OpLoad %_arr_Inner_uint_4 %58
+       %main = OpFunction %void None %31
+         %34 = OpLabel
+         %37 = OpAccessChain %_ptr_StorageBuffer_float %sb %uint_0 %uint_0
+         %38 = OpLoad %float %37
+         %41 = OpAccessChain %_ptr_StorageBuffer_int %sb %uint_0 %uint_1
+         %42 = OpLoad %int %41
+         %44 = OpAccessChain %_ptr_StorageBuffer_uint %sb %uint_0 %uint_2
+         %45 = OpLoad %uint %44
+         %48 = OpAccessChain %_ptr_StorageBuffer_v2float %sb %uint_0 %uint_3
+         %49 = OpLoad %v2float %48
+         %51 = OpAccessChain %_ptr_StorageBuffer_v2int %sb %uint_0 %uint_4
+         %52 = OpLoad %v2int %51
+         %55 = OpAccessChain %_ptr_StorageBuffer_v2uint %sb %uint_0 %uint_5
+         %56 = OpLoad %v2uint %55
+         %59 = OpAccessChain %_ptr_StorageBuffer_v3float %sb %uint_0 %uint_6
+         %60 = OpLoad %v3float %59
+         %63 = OpAccessChain %_ptr_StorageBuffer_v3int %sb %uint_0 %uint_7
+         %64 = OpLoad %v3int %63
+         %67 = OpAccessChain %_ptr_StorageBuffer_v3uint %sb %uint_0 %uint_8
+         %68 = OpLoad %v3uint %67
+         %71 = OpAccessChain %_ptr_StorageBuffer_v4float %sb %uint_0 %uint_9
+         %72 = OpLoad %v4float %71
+         %75 = OpAccessChain %_ptr_StorageBuffer_v4int %sb %uint_0 %uint_10
+         %76 = OpLoad %v4int %75
+         %79 = OpAccessChain %_ptr_StorageBuffer_v4uint %sb %uint_0 %uint_11
+         %80 = OpLoad %v4uint %79
+         %83 = OpAccessChain %_ptr_StorageBuffer_mat2v2float %sb %uint_0 %uint_12
+         %84 = OpLoad %mat2v2float %83
+         %87 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %sb %uint_0 %uint_13
+         %88 = OpLoad %mat2v3float %87
+         %91 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %sb %uint_0 %uint_14
+         %92 = OpLoad %mat2v4float %91
+         %95 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %sb %uint_0 %uint_15
+         %96 = OpLoad %mat3v2float %95
+         %99 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %sb %uint_0 %uint_16
+        %100 = OpLoad %mat3v3float %99
+        %103 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %sb %uint_0 %uint_17
+        %104 = OpLoad %mat3v4float %103
+        %107 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %sb %uint_0 %uint_18
+        %108 = OpLoad %mat4v2float %107
+        %111 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %sb %uint_0 %uint_19
+        %112 = OpLoad %mat4v3float %111
+        %115 = OpAccessChain %_ptr_StorageBuffer_mat4v4float %sb %uint_0 %uint_20
+        %116 = OpLoad %mat4v4float %115
+        %119 = OpAccessChain %_ptr_StorageBuffer__arr_v3float_uint_2 %sb %uint_0 %uint_21
+        %120 = OpLoad %_arr_v3float_uint_2 %119
+        %123 = OpAccessChain %_ptr_StorageBuffer_Inner %sb %uint_0 %uint_22
+        %124 = OpLoad %Inner %123
+        %127 = OpAccessChain %_ptr_StorageBuffer__arr_Inner_uint_4 %sb %uint_0 %uint_23
+        %128 = OpLoad %_arr_Inner_uint_4 %127
                OpReturn
                OpFunctionEnd

diff --git a/test/tint/buffer/storage/static_index/read.wgsl.expected.wgsl b/test/tint/buffer/storage/static_index/read.wgsl.expected.wgsl
index 89ab3c2..a978937 100644
--- a/test/tint/buffer/storage/static_index/read.wgsl.expected.wgsl
+++ b/test/tint/buffer/storage/static_index/read.wgsl.expected.wgsl

@@ -1,32 +1,61 @@
 struct Inner {
-  x : i32,
+  scalar_i32 : i32,
+  scalar_f32 : f32,
 }
 
 struct S {
-  a : vec3<i32>,
-  b : i32,
-  c : vec3<u32>,
-  d : u32,
-  e : vec3<f32>,
-  f : f32,
-  g : mat2x3<f32>,
-  h : mat3x2<f32>,
-  i : Inner,
-  j : array<Inner, 4>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  struct_inner : Inner,
+  array_struct_inner : array<Inner, 4>,
 }
 
-@binding(0) @group(0) var<storage, read> s : S;
+@binding(0) @group(0) var<storage, read> sb : S;
 
 @compute @workgroup_size(1)
 fn main() {
-  let a = s.a;
-  let b = s.b;
-  let c = s.c;
-  let d = s.d;
-  let e = s.e;
-  let f = s.f;
-  let g = s.g;
-  let h = s.h;
-  let i = s.i;
-  let j = s.j;
+  let scalar_f32 = sb.scalar_f32;
+  let scalar_i32 = sb.scalar_i32;
+  let scalar_u32 = sb.scalar_u32;
+  let vec2_f32 = sb.vec2_f32;
+  let vec2_i32 = sb.vec2_i32;
+  let vec2_u32 = sb.vec2_u32;
+  let vec3_f32 = sb.vec3_f32;
+  let vec3_i32 = sb.vec3_i32;
+  let vec3_u32 = sb.vec3_u32;
+  let vec4_f32 = sb.vec4_f32;
+  let vec4_i32 = sb.vec4_i32;
+  let vec4_u32 = sb.vec4_u32;
+  let mat2x2_f32 = sb.mat2x2_f32;
+  let mat2x3_f32 = sb.mat2x3_f32;
+  let mat2x4_f32 = sb.mat2x4_f32;
+  let mat3x2_f32 = sb.mat3x2_f32;
+  let mat3x3_f32 = sb.mat3x3_f32;
+  let mat3x4_f32 = sb.mat3x4_f32;
+  let mat4x2_f32 = sb.mat4x2_f32;
+  let mat4x3_f32 = sb.mat4x3_f32;
+  let mat4x4_f32 = sb.mat4x4_f32;
+  let arr2_vec3_f32 = sb.arr2_vec3_f32;
+  let struct_inner = sb.struct_inner;
+  let array_struct_inner = sb.array_struct_inner;
 }

diff --git a/test/tint/buffer/storage/static_index/read_f16.wgsl b/test/tint/buffer/storage/static_index/read_f16.wgsl
new file mode 100644
index 0000000..5e8d2d1
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/read_f16.wgsl

@@ -0,0 +1,92 @@
+enable f16;
+
+struct Inner {
+    scalar_i32 : i32,
+    scalar_f32 : f32,
+    scalar_f16 : f16,
+};
+
+struct S {
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    scalar_f16 : f16,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec2_f16 : vec2<f16>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec3_f16 : vec3<f16>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    vec4_f16 : vec4<f16>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    mat2x2_f16 : mat2x2<f16>,
+    mat2x3_f16 : mat2x3<f16>,
+    mat2x4_f16 : mat2x4<f16>,
+    mat3x2_f16 : mat3x2<f16>,
+    mat3x3_f16 : mat3x3<f16>,
+    mat3x4_f16 : mat3x4<f16>,
+    mat4x2_f16 : mat4x2<f16>,
+    mat4x3_f16 : mat4x3<f16>,
+    mat4x4_f16 : mat4x4<f16>,
+    arr2_vec3_f32 : array<vec3<f32>, 2>,
+    arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+    struct_inner : Inner,
+    array_struct_inner : array<Inner, 4>,
+};
+
+@binding(0) @group(0) var<storage, read> sb : S;
+
+@compute @workgroup_size(1)
+fn main() {
+    let scalar_f32 = sb.scalar_f32;
+    let scalar_i32 = sb.scalar_i32;
+    let scalar_u32 = sb.scalar_u32;
+    let scalar_f16 = sb.scalar_f16;
+    let vec2_f32 = sb.vec2_f32;
+    let vec2_i32 = sb.vec2_i32;
+    let vec2_u32 = sb.vec2_u32;
+    let vec2_f16 = sb.vec2_f16;
+    let vec3_f32 = sb.vec3_f32;
+    let vec3_i32 = sb.vec3_i32;
+    let vec3_u32 = sb.vec3_u32;
+    let vec3_f16 = sb.vec3_f16;
+    let vec4_f32 = sb.vec4_f32;
+    let vec4_i32 = sb.vec4_i32;
+    let vec4_u32 = sb.vec4_u32;
+    let vec4_f16 = sb.vec4_f16;
+    let mat2x2_f32 = sb.mat2x2_f32;
+    let mat2x3_f32 = sb.mat2x3_f32;
+    let mat2x4_f32 = sb.mat2x4_f32;
+    let mat3x2_f32 = sb.mat3x2_f32;
+    let mat3x3_f32 = sb.mat3x3_f32;
+    let mat3x4_f32 = sb.mat3x4_f32;
+    let mat4x2_f32 = sb.mat4x2_f32;
+    let mat4x3_f32 = sb.mat4x3_f32;
+    let mat4x4_f32 = sb.mat4x4_f32;
+    let mat2x2_f16 = sb.mat2x2_f16;
+    let mat2x3_f16 = sb.mat2x3_f16;
+    let mat2x4_f16 = sb.mat2x4_f16;
+    let mat3x2_f16 = sb.mat3x2_f16;
+    let mat3x3_f16 = sb.mat3x3_f16;
+    let mat3x4_f16 = sb.mat3x4_f16;
+    let mat4x2_f16 = sb.mat4x2_f16;
+    let mat4x3_f16 = sb.mat4x3_f16;
+    let mat4x4_f16 = sb.mat4x4_f16;
+    let arr2_vec3_f32 = sb.arr2_vec3_f32;
+    let arr2_mat4x2_f16 = sb.arr2_mat4x2_f16;
+    let struct_inner = sb.struct_inner;
+    let array_struct_inner = sb.array_struct_inner;
+}

diff --git a/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e0be8a3
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,160 @@
+struct Inner {
+  int scalar_i32;
+  float scalar_f32;
+  float16_t scalar_f16;
+};
+
+ByteAddressBuffer sb : register(t0, space0);
+
+float2x2 tint_symbol_16(ByteAddressBuffer buffer, uint offset) {
+  return float2x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))));
+}
+
+float2x3 tint_symbol_17(ByteAddressBuffer buffer, uint offset) {
+  return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));
+}
+
+float2x4 tint_symbol_18(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+float3x2 tint_symbol_19(ByteAddressBuffer buffer, uint offset) {
+  return float3x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))));
+}
+
+float3x3 tint_symbol_20(ByteAddressBuffer buffer, uint offset) {
+  return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));
+}
+
+float3x4 tint_symbol_21(ByteAddressBuffer buffer, uint offset) {
+  return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));
+}
+
+float4x2 tint_symbol_22(ByteAddressBuffer buffer, uint offset) {
+  return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));
+}
+
+float4x3 tint_symbol_23(ByteAddressBuffer buffer, uint offset) {
+  return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));
+}
+
+float4x4 tint_symbol_24(ByteAddressBuffer buffer, uint offset) {
+  return float4x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))), asfloat(buffer.Load4((offset + 48u))));
+}
+
+matrix<float16_t, 2, 2> tint_symbol_25(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)));
+}
+
+matrix<float16_t, 2, 3> tint_symbol_26(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)));
+}
+
+matrix<float16_t, 2, 4> tint_symbol_27(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)));
+}
+
+matrix<float16_t, 3, 2> tint_symbol_28(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)));
+}
+
+matrix<float16_t, 3, 3> tint_symbol_29(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)));
+}
+
+matrix<float16_t, 3, 4> tint_symbol_30(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)));
+}
+
+matrix<float16_t, 4, 2> tint_symbol_31(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)), buffer.Load<vector<float16_t, 2> >((offset + 12u)));
+}
+
+matrix<float16_t, 4, 3> tint_symbol_32(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)), buffer.Load<vector<float16_t, 3> >((offset + 24u)));
+}
+
+matrix<float16_t, 4, 4> tint_symbol_33(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)), buffer.Load<vector<float16_t, 4> >((offset + 24u)));
+}
+
+typedef float3 tint_symbol_34_ret[2];
+tint_symbol_34_ret tint_symbol_34(ByteAddressBuffer buffer, uint offset) {
+  float3 arr[2] = (float3[2])0;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      arr[i] = asfloat(buffer.Load3((offset + (i * 16u))));
+    }
+  }
+  return arr;
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_35_ret[2];
+tint_symbol_35_ret tint_symbol_35(ByteAddressBuffer buffer, uint offset) {
+  matrix<float16_t, 4, 2> arr_1[2] = (matrix<float16_t, 4, 2>[2])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_31(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr_1;
+}
+
+Inner tint_symbol_36(ByteAddressBuffer buffer, uint offset) {
+  const Inner tint_symbol_38 = {asint(buffer.Load((offset + 0u))), asfloat(buffer.Load((offset + 4u))), buffer.Load<float16_t>((offset + 8u))};
+  return tint_symbol_38;
+}
+
+typedef Inner tint_symbol_37_ret[4];
+tint_symbol_37_ret tint_symbol_37(ByteAddressBuffer buffer, uint offset) {
+  Inner arr_2[4] = (Inner[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_2[i_2] = tint_symbol_36(buffer, (offset + (i_2 * 12u)));
+    }
+  }
+  return arr_2;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float scalar_f32 = asfloat(sb.Load(0u));
+  const int scalar_i32 = asint(sb.Load(4u));
+  const uint scalar_u32 = sb.Load(8u);
+  const float16_t scalar_f16 = sb.Load<float16_t>(12u);
+  const float2 vec2_f32 = asfloat(sb.Load2(16u));
+  const int2 vec2_i32 = asint(sb.Load2(24u));
+  const uint2 vec2_u32 = sb.Load2(32u);
+  const vector<float16_t, 2> vec2_f16 = sb.Load<vector<float16_t, 2> >(40u);
+  const float3 vec3_f32 = asfloat(sb.Load3(48u));
+  const int3 vec3_i32 = asint(sb.Load3(64u));
+  const uint3 vec3_u32 = sb.Load3(80u);
+  const vector<float16_t, 3> vec3_f16 = sb.Load<vector<float16_t, 3> >(96u);
+  const float4 vec4_f32 = asfloat(sb.Load4(112u));
+  const int4 vec4_i32 = asint(sb.Load4(128u));
+  const uint4 vec4_u32 = sb.Load4(144u);
+  const vector<float16_t, 4> vec4_f16 = sb.Load<vector<float16_t, 4> >(160u);
+  const float2x2 mat2x2_f32 = tint_symbol_16(sb, 168u);
+  const float2x3 mat2x3_f32 = tint_symbol_17(sb, 192u);
+  const float2x4 mat2x4_f32 = tint_symbol_18(sb, 224u);
+  const float3x2 mat3x2_f32 = tint_symbol_19(sb, 256u);
+  const float3x3 mat3x3_f32 = tint_symbol_20(sb, 288u);
+  const float3x4 mat3x4_f32 = tint_symbol_21(sb, 336u);
+  const float4x2 mat4x2_f32 = tint_symbol_22(sb, 384u);
+  const float4x3 mat4x3_f32 = tint_symbol_23(sb, 416u);
+  const float4x4 mat4x4_f32 = tint_symbol_24(sb, 480u);
+  const matrix<float16_t, 2, 2> mat2x2_f16 = tint_symbol_25(sb, 544u);
+  const matrix<float16_t, 2, 3> mat2x3_f16 = tint_symbol_26(sb, 552u);
+  const matrix<float16_t, 2, 4> mat2x4_f16 = tint_symbol_27(sb, 568u);
+  const matrix<float16_t, 3, 2> mat3x2_f16 = tint_symbol_28(sb, 584u);
+  const matrix<float16_t, 3, 3> mat3x3_f16 = tint_symbol_29(sb, 600u);
+  const matrix<float16_t, 3, 4> mat3x4_f16 = tint_symbol_30(sb, 624u);
+  const matrix<float16_t, 4, 2> mat4x2_f16 = tint_symbol_31(sb, 648u);
+  const matrix<float16_t, 4, 3> mat4x3_f16 = tint_symbol_32(sb, 664u);
+  const matrix<float16_t, 4, 4> mat4x4_f16 = tint_symbol_33(sb, 696u);
+  const float3 arr2_vec3_f32[2] = tint_symbol_34(sb, 736u);
+  const matrix<float16_t, 4, 2> arr2_mat4x2_f16[2] = tint_symbol_35(sb, 768u);
+  const Inner struct_inner = tint_symbol_36(sb, 800u);
+  const Inner array_struct_inner[4] = tint_symbol_37(sb, 812u);
+  return;
+}

diff --git a/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ae92995
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,165 @@
+SKIP: FAILED
+
+struct Inner {
+  int scalar_i32;
+  float scalar_f32;
+  float16_t scalar_f16;
+};
+
+ByteAddressBuffer sb : register(t0, space0);
+
+float2x2 tint_symbol_16(ByteAddressBuffer buffer, uint offset) {
+  return float2x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))));
+}
+
+float2x3 tint_symbol_17(ByteAddressBuffer buffer, uint offset) {
+  return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));
+}
+
+float2x4 tint_symbol_18(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+float3x2 tint_symbol_19(ByteAddressBuffer buffer, uint offset) {
+  return float3x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))));
+}
+
+float3x3 tint_symbol_20(ByteAddressBuffer buffer, uint offset) {
+  return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));
+}
+
+float3x4 tint_symbol_21(ByteAddressBuffer buffer, uint offset) {
+  return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));
+}
+
+float4x2 tint_symbol_22(ByteAddressBuffer buffer, uint offset) {
+  return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));
+}
+
+float4x3 tint_symbol_23(ByteAddressBuffer buffer, uint offset) {
+  return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));
+}
+
+float4x4 tint_symbol_24(ByteAddressBuffer buffer, uint offset) {
+  return float4x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))), asfloat(buffer.Load4((offset + 48u))));
+}
+
+matrix<float16_t, 2, 2> tint_symbol_25(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)));
+}
+
+matrix<float16_t, 2, 3> tint_symbol_26(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)));
+}
+
+matrix<float16_t, 2, 4> tint_symbol_27(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)));
+}
+
+matrix<float16_t, 3, 2> tint_symbol_28(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)));
+}
+
+matrix<float16_t, 3, 3> tint_symbol_29(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)));
+}
+
+matrix<float16_t, 3, 4> tint_symbol_30(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)));
+}
+
+matrix<float16_t, 4, 2> tint_symbol_31(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)), buffer.Load<vector<float16_t, 2> >((offset + 12u)));
+}
+
+matrix<float16_t, 4, 3> tint_symbol_32(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)), buffer.Load<vector<float16_t, 3> >((offset + 24u)));
+}
+
+matrix<float16_t, 4, 4> tint_symbol_33(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)), buffer.Load<vector<float16_t, 4> >((offset + 24u)));
+}
+
+typedef float3 tint_symbol_34_ret[2];
+tint_symbol_34_ret tint_symbol_34(ByteAddressBuffer buffer, uint offset) {
+  float3 arr[2] = (float3[2])0;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      arr[i] = asfloat(buffer.Load3((offset + (i * 16u))));
+    }
+  }
+  return arr;
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_35_ret[2];
+tint_symbol_35_ret tint_symbol_35(ByteAddressBuffer buffer, uint offset) {
+  matrix<float16_t, 4, 2> arr_1[2] = (matrix<float16_t, 4, 2>[2])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_31(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr_1;
+}
+
+Inner tint_symbol_36(ByteAddressBuffer buffer, uint offset) {
+  const Inner tint_symbol_38 = {asint(buffer.Load((offset + 0u))), asfloat(buffer.Load((offset + 4u))), buffer.Load<float16_t>((offset + 8u))};
+  return tint_symbol_38;
+}
+
+typedef Inner tint_symbol_37_ret[4];
+tint_symbol_37_ret tint_symbol_37(ByteAddressBuffer buffer, uint offset) {
+  Inner arr_2[4] = (Inner[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_2[i_2] = tint_symbol_36(buffer, (offset + (i_2 * 12u)));
+    }
+  }
+  return arr_2;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float scalar_f32 = asfloat(sb.Load(0u));
+  const int scalar_i32 = asint(sb.Load(4u));
+  const uint scalar_u32 = sb.Load(8u);
+  const float16_t scalar_f16 = sb.Load<float16_t>(12u);
+  const float2 vec2_f32 = asfloat(sb.Load2(16u));
+  const int2 vec2_i32 = asint(sb.Load2(24u));
+  const uint2 vec2_u32 = sb.Load2(32u);
+  const vector<float16_t, 2> vec2_f16 = sb.Load<vector<float16_t, 2> >(40u);
+  const float3 vec3_f32 = asfloat(sb.Load3(48u));
+  const int3 vec3_i32 = asint(sb.Load3(64u));
+  const uint3 vec3_u32 = sb.Load3(80u);
+  const vector<float16_t, 3> vec3_f16 = sb.Load<vector<float16_t, 3> >(96u);
+  const float4 vec4_f32 = asfloat(sb.Load4(112u));
+  const int4 vec4_i32 = asint(sb.Load4(128u));
+  const uint4 vec4_u32 = sb.Load4(144u);
+  const vector<float16_t, 4> vec4_f16 = sb.Load<vector<float16_t, 4> >(160u);
+  const float2x2 mat2x2_f32 = tint_symbol_16(sb, 168u);
+  const float2x3 mat2x3_f32 = tint_symbol_17(sb, 192u);
+  const float2x4 mat2x4_f32 = tint_symbol_18(sb, 224u);
+  const float3x2 mat3x2_f32 = tint_symbol_19(sb, 256u);
+  const float3x3 mat3x3_f32 = tint_symbol_20(sb, 288u);
+  const float3x4 mat3x4_f32 = tint_symbol_21(sb, 336u);
+  const float4x2 mat4x2_f32 = tint_symbol_22(sb, 384u);
+  const float4x3 mat4x3_f32 = tint_symbol_23(sb, 416u);
+  const float4x4 mat4x4_f32 = tint_symbol_24(sb, 480u);
+  const matrix<float16_t, 2, 2> mat2x2_f16 = tint_symbol_25(sb, 544u);
+  const matrix<float16_t, 2, 3> mat2x3_f16 = tint_symbol_26(sb, 552u);
+  const matrix<float16_t, 2, 4> mat2x4_f16 = tint_symbol_27(sb, 568u);
+  const matrix<float16_t, 3, 2> mat3x2_f16 = tint_symbol_28(sb, 584u);
+  const matrix<float16_t, 3, 3> mat3x3_f16 = tint_symbol_29(sb, 600u);
+  const matrix<float16_t, 3, 4> mat3x4_f16 = tint_symbol_30(sb, 624u);
+  const matrix<float16_t, 4, 2> mat4x2_f16 = tint_symbol_31(sb, 648u);
+  const matrix<float16_t, 4, 3> mat4x3_f16 = tint_symbol_32(sb, 664u);
+  const matrix<float16_t, 4, 4> mat4x4_f16 = tint_symbol_33(sb, 696u);
+  const float3 arr2_vec3_f32[2] = tint_symbol_34(sb, 736u);
+  const matrix<float16_t, 4, 2> arr2_mat4x2_f16[2] = tint_symbol_35(sb, 768u);
+  const Inner struct_inner = tint_symbol_36(sb, 800u);
+  const Inner array_struct_inner[4] = tint_symbol_37(sb, 812u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002348228CC10(4,3-11): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.glsl b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..49d5d53
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.glsl

@@ -0,0 +1,114 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  int scalar_i32;
+  float scalar_f32;
+  float16_t scalar_f16;
+};
+
+struct S {
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
+  float16_t scalar_f16;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  f16vec2 vec2_f16;
+  uint pad;
+  vec3 vec3_f32;
+  uint pad_1;
+  ivec3 vec3_i32;
+  uint pad_2;
+  uvec3 vec3_u32;
+  uint pad_3;
+  f16vec3 vec3_f16;
+  uint pad_4;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  f16vec4 vec4_f16;
+  mat2 mat2x2_f32;
+  uint pad_6;
+  uint pad_7;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_8;
+  uint pad_9;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  f16mat2 mat2x2_f16;
+  f16mat2x3 mat2x3_f16;
+  f16mat2x4 mat2x4_f16;
+  f16mat3x2 mat3x2_f16;
+  uint pad_10;
+  f16mat3 mat3x3_f16;
+  f16mat3x4 mat3x4_f16;
+  f16mat4x2 mat4x2_f16;
+  f16mat4x3 mat4x3_f16;
+  f16mat4 mat4x4_f16;
+  uint pad_11;
+  uint pad_12;
+  vec3 arr2_vec3_f32[2];
+  f16mat4x2 arr2_mat4x2_f16[2];
+  Inner struct_inner;
+  Inner array_struct_inner[4];
+  uint pad_13;
+};
+
+layout(binding = 0, std430) buffer sb_block_ssbo {
+  S inner;
+} sb;
+
+void tint_symbol() {
+  float scalar_f32 = sb.inner.scalar_f32;
+  int scalar_i32 = sb.inner.scalar_i32;
+  uint scalar_u32 = sb.inner.scalar_u32;
+  float16_t scalar_f16 = sb.inner.scalar_f16;
+  vec2 vec2_f32 = sb.inner.vec2_f32;
+  ivec2 vec2_i32 = sb.inner.vec2_i32;
+  uvec2 vec2_u32 = sb.inner.vec2_u32;
+  f16vec2 vec2_f16 = sb.inner.vec2_f16;
+  vec3 vec3_f32 = sb.inner.vec3_f32;
+  ivec3 vec3_i32 = sb.inner.vec3_i32;
+  uvec3 vec3_u32 = sb.inner.vec3_u32;
+  f16vec3 vec3_f16 = sb.inner.vec3_f16;
+  vec4 vec4_f32 = sb.inner.vec4_f32;
+  ivec4 vec4_i32 = sb.inner.vec4_i32;
+  uvec4 vec4_u32 = sb.inner.vec4_u32;
+  f16vec4 vec4_f16 = sb.inner.vec4_f16;
+  mat2 mat2x2_f32 = sb.inner.mat2x2_f32;
+  mat2x3 mat2x3_f32 = sb.inner.mat2x3_f32;
+  mat2x4 mat2x4_f32 = sb.inner.mat2x4_f32;
+  mat3x2 mat3x2_f32 = sb.inner.mat3x2_f32;
+  mat3 mat3x3_f32 = sb.inner.mat3x3_f32;
+  mat3x4 mat3x4_f32 = sb.inner.mat3x4_f32;
+  mat4x2 mat4x2_f32 = sb.inner.mat4x2_f32;
+  mat4x3 mat4x3_f32 = sb.inner.mat4x3_f32;
+  mat4 mat4x4_f32 = sb.inner.mat4x4_f32;
+  f16mat2 mat2x2_f16 = sb.inner.mat2x2_f16;
+  f16mat2x3 mat2x3_f16 = sb.inner.mat2x3_f16;
+  f16mat2x4 mat2x4_f16 = sb.inner.mat2x4_f16;
+  f16mat3x2 mat3x2_f16 = sb.inner.mat3x2_f16;
+  f16mat3 mat3x3_f16 = sb.inner.mat3x3_f16;
+  f16mat3x4 mat3x4_f16 = sb.inner.mat3x4_f16;
+  f16mat4x2 mat4x2_f16 = sb.inner.mat4x2_f16;
+  f16mat4x3 mat4x3_f16 = sb.inner.mat4x3_f16;
+  f16mat4 mat4x4_f16 = sb.inner.mat4x4_f16;
+  vec3 arr2_vec3_f32[2] = sb.inner.arr2_vec3_f32;
+  f16mat4x2 arr2_mat4x2_f16[2] = sb.inner.arr2_mat4x2_f16;
+  Inner struct_inner = sb.inner.struct_inner;
+  Inner array_struct_inner[4] = sb.inner.array_struct_inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.msl b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.msl
new file mode 100644
index 0000000..461e002
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.msl

@@ -0,0 +1,117 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ int scalar_i32;
+  /* 0x0004 */ float scalar_f32;
+  /* 0x0008 */ half scalar_f16;
+  /* 0x000a */ tint_array<int8_t, 2> tint_pad;
+};
+
+struct S {
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ half scalar_f16;
+  /* 0x000e */ tint_array<int8_t, 2> tint_pad_1;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ half2 vec2_f16;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_3;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_5;
+  /* 0x0060 */ packed_half3 vec3_f16;
+  /* 0x0066 */ tint_array<int8_t, 10> tint_pad_6;
+  /* 0x0070 */ float4 vec4_f32;
+  /* 0x0080 */ int4 vec4_i32;
+  /* 0x0090 */ uint4 vec4_u32;
+  /* 0x00a0 */ half4 vec4_f16;
+  /* 0x00a8 */ float2x2 mat2x2_f32;
+  /* 0x00b8 */ tint_array<int8_t, 8> tint_pad_7;
+  /* 0x00c0 */ float2x3 mat2x3_f32;
+  /* 0x00e0 */ float2x4 mat2x4_f32;
+  /* 0x0100 */ float3x2 mat3x2_f32;
+  /* 0x0118 */ tint_array<int8_t, 8> tint_pad_8;
+  /* 0x0120 */ float3x3 mat3x3_f32;
+  /* 0x0150 */ float3x4 mat3x4_f32;
+  /* 0x0180 */ float4x2 mat4x2_f32;
+  /* 0x01a0 */ float4x3 mat4x3_f32;
+  /* 0x01e0 */ float4x4 mat4x4_f32;
+  /* 0x0220 */ half2x2 mat2x2_f16;
+  /* 0x0228 */ half2x3 mat2x3_f16;
+  /* 0x0238 */ half2x4 mat2x4_f16;
+  /* 0x0248 */ half3x2 mat3x2_f16;
+  /* 0x0254 */ tint_array<int8_t, 4> tint_pad_9;
+  /* 0x0258 */ half3x3 mat3x3_f16;
+  /* 0x0270 */ half3x4 mat3x4_f16;
+  /* 0x0288 */ half4x2 mat4x2_f16;
+  /* 0x0298 */ half4x3 mat4x3_f16;
+  /* 0x02b8 */ half4x4 mat4x4_f16;
+  /* 0x02d8 */ tint_array<int8_t, 8> tint_pad_10;
+  /* 0x02e0 */ tint_array<float3, 2> arr2_vec3_f32;
+  /* 0x0300 */ tint_array<half4x2, 2> arr2_mat4x2_f16;
+  /* 0x0320 */ Inner struct_inner;
+  /* 0x032c */ tint_array<Inner, 4> array_struct_inner;
+  /* 0x035c */ tint_array<int8_t, 4> tint_pad_11;
+};
+
+kernel void tint_symbol(const device S* tint_symbol_1 [[buffer(0)]]) {
+  float const scalar_f32 = (*(tint_symbol_1)).scalar_f32;
+  int const scalar_i32 = (*(tint_symbol_1)).scalar_i32;
+  uint const scalar_u32 = (*(tint_symbol_1)).scalar_u32;
+  half const scalar_f16 = (*(tint_symbol_1)).scalar_f16;
+  float2 const vec2_f32 = (*(tint_symbol_1)).vec2_f32;
+  int2 const vec2_i32 = (*(tint_symbol_1)).vec2_i32;
+  uint2 const vec2_u32 = (*(tint_symbol_1)).vec2_u32;
+  half2 const vec2_f16 = (*(tint_symbol_1)).vec2_f16;
+  float3 const vec3_f32 = float3((*(tint_symbol_1)).vec3_f32);
+  int3 const vec3_i32 = int3((*(tint_symbol_1)).vec3_i32);
+  uint3 const vec3_u32 = uint3((*(tint_symbol_1)).vec3_u32);
+  half3 const vec3_f16 = half3((*(tint_symbol_1)).vec3_f16);
+  float4 const vec4_f32 = (*(tint_symbol_1)).vec4_f32;
+  int4 const vec4_i32 = (*(tint_symbol_1)).vec4_i32;
+  uint4 const vec4_u32 = (*(tint_symbol_1)).vec4_u32;
+  half4 const vec4_f16 = (*(tint_symbol_1)).vec4_f16;
+  float2x2 const mat2x2_f32 = (*(tint_symbol_1)).mat2x2_f32;
+  float2x3 const mat2x3_f32 = (*(tint_symbol_1)).mat2x3_f32;
+  float2x4 const mat2x4_f32 = (*(tint_symbol_1)).mat2x4_f32;
+  float3x2 const mat3x2_f32 = (*(tint_symbol_1)).mat3x2_f32;
+  float3x3 const mat3x3_f32 = (*(tint_symbol_1)).mat3x3_f32;
+  float3x4 const mat3x4_f32 = (*(tint_symbol_1)).mat3x4_f32;
+  float4x2 const mat4x2_f32 = (*(tint_symbol_1)).mat4x2_f32;
+  float4x3 const mat4x3_f32 = (*(tint_symbol_1)).mat4x3_f32;
+  float4x4 const mat4x4_f32 = (*(tint_symbol_1)).mat4x4_f32;
+  half2x2 const mat2x2_f16 = (*(tint_symbol_1)).mat2x2_f16;
+  half2x3 const mat2x3_f16 = (*(tint_symbol_1)).mat2x3_f16;
+  half2x4 const mat2x4_f16 = (*(tint_symbol_1)).mat2x4_f16;
+  half3x2 const mat3x2_f16 = (*(tint_symbol_1)).mat3x2_f16;
+  half3x3 const mat3x3_f16 = (*(tint_symbol_1)).mat3x3_f16;
+  half3x4 const mat3x4_f16 = (*(tint_symbol_1)).mat3x4_f16;
+  half4x2 const mat4x2_f16 = (*(tint_symbol_1)).mat4x2_f16;
+  half4x3 const mat4x3_f16 = (*(tint_symbol_1)).mat4x3_f16;
+  half4x4 const mat4x4_f16 = (*(tint_symbol_1)).mat4x4_f16;
+  tint_array<float3, 2> const arr2_vec3_f32 = (*(tint_symbol_1)).arr2_vec3_f32;
+  tint_array<half4x2, 2> const arr2_mat4x2_f16 = (*(tint_symbol_1)).arr2_mat4x2_f16;
+  Inner const struct_inner = (*(tint_symbol_1)).struct_inner;
+  tint_array<Inner, 4> const array_struct_inner = (*(tint_symbol_1)).array_struct_inner;
+  return;
+}
+

diff --git a/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..fcaa0d9
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.spvasm

@@ -0,0 +1,347 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 199
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %sb_block "sb_block"
+               OpMemberName %sb_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "scalar_f32"
+               OpMemberName %S 1 "scalar_i32"
+               OpMemberName %S 2 "scalar_u32"
+               OpMemberName %S 3 "scalar_f16"
+               OpMemberName %S 4 "vec2_f32"
+               OpMemberName %S 5 "vec2_i32"
+               OpMemberName %S 6 "vec2_u32"
+               OpMemberName %S 7 "vec2_f16"
+               OpMemberName %S 8 "vec3_f32"
+               OpMemberName %S 9 "vec3_i32"
+               OpMemberName %S 10 "vec3_u32"
+               OpMemberName %S 11 "vec3_f16"
+               OpMemberName %S 12 "vec4_f32"
+               OpMemberName %S 13 "vec4_i32"
+               OpMemberName %S 14 "vec4_u32"
+               OpMemberName %S 15 "vec4_f16"
+               OpMemberName %S 16 "mat2x2_f32"
+               OpMemberName %S 17 "mat2x3_f32"
+               OpMemberName %S 18 "mat2x4_f32"
+               OpMemberName %S 19 "mat3x2_f32"
+               OpMemberName %S 20 "mat3x3_f32"
+               OpMemberName %S 21 "mat3x4_f32"
+               OpMemberName %S 22 "mat4x2_f32"
+               OpMemberName %S 23 "mat4x3_f32"
+               OpMemberName %S 24 "mat4x4_f32"
+               OpMemberName %S 25 "mat2x2_f16"
+               OpMemberName %S 26 "mat2x3_f16"
+               OpMemberName %S 27 "mat2x4_f16"
+               OpMemberName %S 28 "mat3x2_f16"
+               OpMemberName %S 29 "mat3x3_f16"
+               OpMemberName %S 30 "mat3x4_f16"
+               OpMemberName %S 31 "mat4x2_f16"
+               OpMemberName %S 32 "mat4x3_f16"
+               OpMemberName %S 33 "mat4x4_f16"
+               OpMemberName %S 34 "arr2_vec3_f32"
+               OpMemberName %S 35 "arr2_mat4x2_f16"
+               OpMemberName %S 36 "struct_inner"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "scalar_i32"
+               OpMemberName %Inner 1 "scalar_f32"
+               OpMemberName %Inner 2 "scalar_f16"
+               OpMemberName %S 37 "array_struct_inner"
+               OpName %sb "sb"
+               OpName %main "main"
+               OpDecorate %sb_block Block
+               OpMemberDecorate %sb_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 2 Offset 8
+               OpMemberDecorate %S 3 Offset 12
+               OpMemberDecorate %S 4 Offset 16
+               OpMemberDecorate %S 5 Offset 24
+               OpMemberDecorate %S 6 Offset 32
+               OpMemberDecorate %S 7 Offset 40
+               OpMemberDecorate %S 8 Offset 48
+               OpMemberDecorate %S 9 Offset 64
+               OpMemberDecorate %S 10 Offset 80
+               OpMemberDecorate %S 11 Offset 96
+               OpMemberDecorate %S 12 Offset 112
+               OpMemberDecorate %S 13 Offset 128
+               OpMemberDecorate %S 14 Offset 144
+               OpMemberDecorate %S 15 Offset 160
+               OpMemberDecorate %S 16 Offset 168
+               OpMemberDecorate %S 16 ColMajor
+               OpMemberDecorate %S 16 MatrixStride 8
+               OpMemberDecorate %S 17 Offset 192
+               OpMemberDecorate %S 17 ColMajor
+               OpMemberDecorate %S 17 MatrixStride 16
+               OpMemberDecorate %S 18 Offset 224
+               OpMemberDecorate %S 18 ColMajor
+               OpMemberDecorate %S 18 MatrixStride 16
+               OpMemberDecorate %S 19 Offset 256
+               OpMemberDecorate %S 19 ColMajor
+               OpMemberDecorate %S 19 MatrixStride 8
+               OpMemberDecorate %S 20 Offset 288
+               OpMemberDecorate %S 20 ColMajor
+               OpMemberDecorate %S 20 MatrixStride 16
+               OpMemberDecorate %S 21 Offset 336
+               OpMemberDecorate %S 21 ColMajor
+               OpMemberDecorate %S 21 MatrixStride 16
+               OpMemberDecorate %S 22 Offset 384
+               OpMemberDecorate %S 22 ColMajor
+               OpMemberDecorate %S 22 MatrixStride 8
+               OpMemberDecorate %S 23 Offset 416
+               OpMemberDecorate %S 23 ColMajor
+               OpMemberDecorate %S 23 MatrixStride 16
+               OpMemberDecorate %S 24 Offset 480
+               OpMemberDecorate %S 24 ColMajor
+               OpMemberDecorate %S 24 MatrixStride 16
+               OpMemberDecorate %S 25 Offset 544
+               OpMemberDecorate %S 25 ColMajor
+               OpMemberDecorate %S 25 MatrixStride 4
+               OpMemberDecorate %S 26 Offset 552
+               OpMemberDecorate %S 26 ColMajor
+               OpMemberDecorate %S 26 MatrixStride 8
+               OpMemberDecorate %S 27 Offset 568
+               OpMemberDecorate %S 27 ColMajor
+               OpMemberDecorate %S 27 MatrixStride 8
+               OpMemberDecorate %S 28 Offset 584
+               OpMemberDecorate %S 28 ColMajor
+               OpMemberDecorate %S 28 MatrixStride 4
+               OpMemberDecorate %S 29 Offset 600
+               OpMemberDecorate %S 29 ColMajor
+               OpMemberDecorate %S 29 MatrixStride 8
+               OpMemberDecorate %S 30 Offset 624
+               OpMemberDecorate %S 30 ColMajor
+               OpMemberDecorate %S 30 MatrixStride 8
+               OpMemberDecorate %S 31 Offset 648
+               OpMemberDecorate %S 31 ColMajor
+               OpMemberDecorate %S 31 MatrixStride 4
+               OpMemberDecorate %S 32 Offset 664
+               OpMemberDecorate %S 32 ColMajor
+               OpMemberDecorate %S 32 MatrixStride 8
+               OpMemberDecorate %S 33 Offset 696
+               OpMemberDecorate %S 33 ColMajor
+               OpMemberDecorate %S 33 MatrixStride 8
+               OpMemberDecorate %S 34 Offset 736
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpMemberDecorate %S 35 Offset 768
+               OpMemberDecorate %S 35 ColMajor
+               OpMemberDecorate %S 35 MatrixStride 4
+               OpDecorate %_arr_mat4v2half_uint_2 ArrayStride 16
+               OpMemberDecorate %S 36 Offset 800
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 1 Offset 4
+               OpMemberDecorate %Inner 2 Offset 8
+               OpMemberDecorate %S 37 Offset 812
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 12
+               OpDecorate %sb NonWritable
+               OpDecorate %sb Binding 0
+               OpDecorate %sb DescriptorSet 0
+      %float = OpTypeFloat 32
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+       %half = OpTypeFloat 16
+    %v2float = OpTypeVector %float 2
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+     %v2half = OpTypeVector %half 2
+    %v3float = OpTypeVector %float 3
+      %v3int = OpTypeVector %int 3
+     %v3uint = OpTypeVector %uint 3
+     %v3half = OpTypeVector %half 3
+    %v4float = OpTypeVector %float 4
+      %v4int = OpTypeVector %int 4
+     %v4uint = OpTypeVector %uint 4
+     %v4half = OpTypeVector %half 4
+%mat2v2float = OpTypeMatrix %v2float 2
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v2float = OpTypeMatrix %v2float 4
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+ %mat2v2half = OpTypeMatrix %v2half 2
+ %mat2v3half = OpTypeMatrix %v3half 2
+ %mat2v4half = OpTypeMatrix %v4half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+ %mat3v3half = OpTypeMatrix %v3half 3
+ %mat3v4half = OpTypeMatrix %v4half 3
+ %mat4v2half = OpTypeMatrix %v2half 4
+ %mat4v3half = OpTypeMatrix %v3half 4
+ %mat4v4half = OpTypeMatrix %v4half 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+%_arr_mat4v2half_uint_2 = OpTypeArray %mat4v2half %uint_2
+      %Inner = OpTypeStruct %int %float %half
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+          %S = OpTypeStruct %float %int %uint %half %v2float %v2int %v2uint %v2half %v3float %v3int %v3uint %v3half %v4float %v4int %v4uint %v4half %mat2v2float %mat2v3float %mat2v4float %mat3v2float %mat3v3float %mat3v4float %mat4v2float %mat4v3float %mat4v4float %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half %_arr_v3float_uint_2 %_arr_mat4v2half_uint_2 %Inner %_arr_Inner_uint_4
+   %sb_block = OpTypeStruct %S
+%_ptr_StorageBuffer_sb_block = OpTypePointer StorageBuffer %sb_block
+         %sb = OpVariable %_ptr_StorageBuffer_sb_block StorageBuffer
+       %void = OpTypeVoid
+         %45 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+     %uint_3 = OpConstant %uint 3
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+     %uint_5 = OpConstant %uint 5
+%_ptr_StorageBuffer_v2int = OpTypePointer StorageBuffer %v2int
+     %uint_6 = OpConstant %uint 6
+%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
+     %uint_7 = OpConstant %uint 7
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+     %uint_8 = OpConstant %uint 8
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+     %uint_9 = OpConstant %uint 9
+%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
+    %uint_10 = OpConstant %uint 10
+%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
+    %uint_11 = OpConstant %uint 11
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+    %uint_12 = OpConstant %uint 12
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+    %uint_13 = OpConstant %uint 13
+%_ptr_StorageBuffer_v4int = OpTypePointer StorageBuffer %v4int
+    %uint_14 = OpConstant %uint 14
+%_ptr_StorageBuffer_v4uint = OpTypePointer StorageBuffer %v4uint
+    %uint_15 = OpConstant %uint 15
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+    %uint_16 = OpConstant %uint 16
+%_ptr_StorageBuffer_mat2v2float = OpTypePointer StorageBuffer %mat2v2float
+    %uint_17 = OpConstant %uint 17
+%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+    %uint_18 = OpConstant %uint 18
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+    %uint_19 = OpConstant %uint 19
+%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+    %uint_20 = OpConstant %uint 20
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+    %uint_21 = OpConstant %uint 21
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+    %uint_22 = OpConstant %uint 22
+%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
+    %uint_23 = OpConstant %uint 23
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+    %uint_24 = OpConstant %uint 24
+%_ptr_StorageBuffer_mat4v4float = OpTypePointer StorageBuffer %mat4v4float
+    %uint_25 = OpConstant %uint 25
+%_ptr_StorageBuffer_mat2v2half = OpTypePointer StorageBuffer %mat2v2half
+    %uint_26 = OpConstant %uint 26
+%_ptr_StorageBuffer_mat2v3half = OpTypePointer StorageBuffer %mat2v3half
+    %uint_27 = OpConstant %uint 27
+%_ptr_StorageBuffer_mat2v4half = OpTypePointer StorageBuffer %mat2v4half
+    %uint_28 = OpConstant %uint 28
+%_ptr_StorageBuffer_mat3v2half = OpTypePointer StorageBuffer %mat3v2half
+    %uint_29 = OpConstant %uint 29
+%_ptr_StorageBuffer_mat3v3half = OpTypePointer StorageBuffer %mat3v3half
+    %uint_30 = OpConstant %uint 30
+%_ptr_StorageBuffer_mat3v4half = OpTypePointer StorageBuffer %mat3v4half
+    %uint_31 = OpConstant %uint 31
+%_ptr_StorageBuffer_mat4v2half = OpTypePointer StorageBuffer %mat4v2half
+    %uint_32 = OpConstant %uint 32
+%_ptr_StorageBuffer_mat4v3half = OpTypePointer StorageBuffer %mat4v3half
+    %uint_33 = OpConstant %uint 33
+%_ptr_StorageBuffer_mat4v4half = OpTypePointer StorageBuffer %mat4v4half
+    %uint_34 = OpConstant %uint 34
+%_ptr_StorageBuffer__arr_v3float_uint_2 = OpTypePointer StorageBuffer %_arr_v3float_uint_2
+    %uint_35 = OpConstant %uint 35
+%_ptr_StorageBuffer__arr_mat4v2half_uint_2 = OpTypePointer StorageBuffer %_arr_mat4v2half_uint_2
+    %uint_36 = OpConstant %uint 36
+%_ptr_StorageBuffer_Inner = OpTypePointer StorageBuffer %Inner
+    %uint_37 = OpConstant %uint 37
+%_ptr_StorageBuffer__arr_Inner_uint_4 = OpTypePointer StorageBuffer %_arr_Inner_uint_4
+       %main = OpFunction %void None %45
+         %48 = OpLabel
+         %51 = OpAccessChain %_ptr_StorageBuffer_float %sb %uint_0 %uint_0
+         %52 = OpLoad %float %51
+         %55 = OpAccessChain %_ptr_StorageBuffer_int %sb %uint_0 %uint_1
+         %56 = OpLoad %int %55
+         %58 = OpAccessChain %_ptr_StorageBuffer_uint %sb %uint_0 %uint_2
+         %59 = OpLoad %uint %58
+         %62 = OpAccessChain %_ptr_StorageBuffer_half %sb %uint_0 %uint_3
+         %63 = OpLoad %half %62
+         %65 = OpAccessChain %_ptr_StorageBuffer_v2float %sb %uint_0 %uint_4
+         %66 = OpLoad %v2float %65
+         %69 = OpAccessChain %_ptr_StorageBuffer_v2int %sb %uint_0 %uint_5
+         %70 = OpLoad %v2int %69
+         %73 = OpAccessChain %_ptr_StorageBuffer_v2uint %sb %uint_0 %uint_6
+         %74 = OpLoad %v2uint %73
+         %77 = OpAccessChain %_ptr_StorageBuffer_v2half %sb %uint_0 %uint_7
+         %78 = OpLoad %v2half %77
+         %81 = OpAccessChain %_ptr_StorageBuffer_v3float %sb %uint_0 %uint_8
+         %82 = OpLoad %v3float %81
+         %85 = OpAccessChain %_ptr_StorageBuffer_v3int %sb %uint_0 %uint_9
+         %86 = OpLoad %v3int %85
+         %89 = OpAccessChain %_ptr_StorageBuffer_v3uint %sb %uint_0 %uint_10
+         %90 = OpLoad %v3uint %89
+         %93 = OpAccessChain %_ptr_StorageBuffer_v3half %sb %uint_0 %uint_11
+         %94 = OpLoad %v3half %93
+         %97 = OpAccessChain %_ptr_StorageBuffer_v4float %sb %uint_0 %uint_12
+         %98 = OpLoad %v4float %97
+        %101 = OpAccessChain %_ptr_StorageBuffer_v4int %sb %uint_0 %uint_13
+        %102 = OpLoad %v4int %101
+        %105 = OpAccessChain %_ptr_StorageBuffer_v4uint %sb %uint_0 %uint_14
+        %106 = OpLoad %v4uint %105
+        %109 = OpAccessChain %_ptr_StorageBuffer_v4half %sb %uint_0 %uint_15
+        %110 = OpLoad %v4half %109
+        %113 = OpAccessChain %_ptr_StorageBuffer_mat2v2float %sb %uint_0 %uint_16
+        %114 = OpLoad %mat2v2float %113
+        %117 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %sb %uint_0 %uint_17
+        %118 = OpLoad %mat2v3float %117
+        %121 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %sb %uint_0 %uint_18
+        %122 = OpLoad %mat2v4float %121
+        %125 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %sb %uint_0 %uint_19
+        %126 = OpLoad %mat3v2float %125
+        %129 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %sb %uint_0 %uint_20
+        %130 = OpLoad %mat3v3float %129
+        %133 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %sb %uint_0 %uint_21
+        %134 = OpLoad %mat3v4float %133
+        %137 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %sb %uint_0 %uint_22
+        %138 = OpLoad %mat4v2float %137
+        %141 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %sb %uint_0 %uint_23
+        %142 = OpLoad %mat4v3float %141
+        %145 = OpAccessChain %_ptr_StorageBuffer_mat4v4float %sb %uint_0 %uint_24
+        %146 = OpLoad %mat4v4float %145
+        %149 = OpAccessChain %_ptr_StorageBuffer_mat2v2half %sb %uint_0 %uint_25
+        %150 = OpLoad %mat2v2half %149
+        %153 = OpAccessChain %_ptr_StorageBuffer_mat2v3half %sb %uint_0 %uint_26
+        %154 = OpLoad %mat2v3half %153
+        %157 = OpAccessChain %_ptr_StorageBuffer_mat2v4half %sb %uint_0 %uint_27
+        %158 = OpLoad %mat2v4half %157
+        %161 = OpAccessChain %_ptr_StorageBuffer_mat3v2half %sb %uint_0 %uint_28
+        %162 = OpLoad %mat3v2half %161
+        %165 = OpAccessChain %_ptr_StorageBuffer_mat3v3half %sb %uint_0 %uint_29
+        %166 = OpLoad %mat3v3half %165
+        %169 = OpAccessChain %_ptr_StorageBuffer_mat3v4half %sb %uint_0 %uint_30
+        %170 = OpLoad %mat3v4half %169
+        %173 = OpAccessChain %_ptr_StorageBuffer_mat4v2half %sb %uint_0 %uint_31
+        %174 = OpLoad %mat4v2half %173
+        %177 = OpAccessChain %_ptr_StorageBuffer_mat4v3half %sb %uint_0 %uint_32
+        %178 = OpLoad %mat4v3half %177
+        %181 = OpAccessChain %_ptr_StorageBuffer_mat4v4half %sb %uint_0 %uint_33
+        %182 = OpLoad %mat4v4half %181
+        %185 = OpAccessChain %_ptr_StorageBuffer__arr_v3float_uint_2 %sb %uint_0 %uint_34
+        %186 = OpLoad %_arr_v3float_uint_2 %185
+        %189 = OpAccessChain %_ptr_StorageBuffer__arr_mat4v2half_uint_2 %sb %uint_0 %uint_35
+        %190 = OpLoad %_arr_mat4v2half_uint_2 %189
+        %193 = OpAccessChain %_ptr_StorageBuffer_Inner %sb %uint_0 %uint_36
+        %194 = OpLoad %Inner %193
+        %197 = OpAccessChain %_ptr_StorageBuffer__arr_Inner_uint_4 %sb %uint_0 %uint_37
+        %198 = OpLoad %_arr_Inner_uint_4 %197
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..cb7c9e1
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/read_f16.wgsl.expected.wgsl

@@ -0,0 +1,92 @@
+enable f16;
+
+struct Inner {
+  scalar_i32 : i32,
+  scalar_f32 : f32,
+  scalar_f16 : f16,
+}
+
+struct S {
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+  struct_inner : Inner,
+  array_struct_inner : array<Inner, 4>,
+}
+
+@binding(0) @group(0) var<storage, read> sb : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let scalar_f32 = sb.scalar_f32;
+  let scalar_i32 = sb.scalar_i32;
+  let scalar_u32 = sb.scalar_u32;
+  let scalar_f16 = sb.scalar_f16;
+  let vec2_f32 = sb.vec2_f32;
+  let vec2_i32 = sb.vec2_i32;
+  let vec2_u32 = sb.vec2_u32;
+  let vec2_f16 = sb.vec2_f16;
+  let vec3_f32 = sb.vec3_f32;
+  let vec3_i32 = sb.vec3_i32;
+  let vec3_u32 = sb.vec3_u32;
+  let vec3_f16 = sb.vec3_f16;
+  let vec4_f32 = sb.vec4_f32;
+  let vec4_i32 = sb.vec4_i32;
+  let vec4_u32 = sb.vec4_u32;
+  let vec4_f16 = sb.vec4_f16;
+  let mat2x2_f32 = sb.mat2x2_f32;
+  let mat2x3_f32 = sb.mat2x3_f32;
+  let mat2x4_f32 = sb.mat2x4_f32;
+  let mat3x2_f32 = sb.mat3x2_f32;
+  let mat3x3_f32 = sb.mat3x3_f32;
+  let mat3x4_f32 = sb.mat3x4_f32;
+  let mat4x2_f32 = sb.mat4x2_f32;
+  let mat4x3_f32 = sb.mat4x3_f32;
+  let mat4x4_f32 = sb.mat4x4_f32;
+  let mat2x2_f16 = sb.mat2x2_f16;
+  let mat2x3_f16 = sb.mat2x3_f16;
+  let mat2x4_f16 = sb.mat2x4_f16;
+  let mat3x2_f16 = sb.mat3x2_f16;
+  let mat3x3_f16 = sb.mat3x3_f16;
+  let mat3x4_f16 = sb.mat3x4_f16;
+  let mat4x2_f16 = sb.mat4x2_f16;
+  let mat4x3_f16 = sb.mat4x3_f16;
+  let mat4x4_f16 = sb.mat4x4_f16;
+  let arr2_vec3_f32 = sb.arr2_vec3_f32;
+  let arr2_mat4x2_f16 = sb.arr2_mat4x2_f16;
+  let struct_inner = sb.struct_inner;
+  let array_struct_inner = sb.array_struct_inner;
+}

diff --git a/test/tint/buffer/storage/static_index/write.wgsl b/test/tint/buffer/storage/static_index/write.wgsl
index 5290db8..abec74e 100644
--- a/test/tint/buffer/storage/static_index/write.wgsl
+++ b/test/tint/buffer/storage/static_index/write.wgsl

@@ -1,32 +1,61 @@
 struct Inner {
-    x : i32,
+    scalar_i32 : i32,
+    scalar_f32 : f32,
 };
 
 struct S {
-    a : vec3<i32>,
-    b : i32,
-    c : vec3<u32>,
-    d : u32,
-    e : vec3<f32>,
-    f : f32,
-    g : mat2x3<f32>,
-    h : mat3x2<f32>,
-    i : Inner,
-    j : array<Inner, 4>,
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    arr2_vec3_f32 : array<vec3<f32>, 2>,
+    struct_inner : Inner,
+    array_struct_inner : array<Inner, 4>,
 };
 
-@binding(0) @group(0) var<storage, read_write> s : S;
+@binding(0) @group(0) var<storage, read_write> sb : S;
 
 @compute @workgroup_size(1)
 fn main() {
-    s.a = vec3<i32>();
-    s.b = i32();
-    s.c = vec3<u32>();
-    s.d = u32();
-    s.e = vec3<f32>();
-    s.f = f32();
-    s.g = mat2x3<f32>();
-    s.h = mat3x2<f32>();
-    s.i = Inner();
-    s.j = array<Inner, 4>();
+    sb.scalar_f32 = f32();
+    sb.scalar_i32 = i32();
+    sb.scalar_u32 = u32();
+    sb.vec2_f32 = vec2<f32>();
+    sb.vec2_i32 = vec2<i32>();
+    sb.vec2_u32 = vec2<u32>();
+    sb.vec3_f32 = vec3<f32>();
+    sb.vec3_i32 = vec3<i32>();
+    sb.vec3_u32 = vec3<u32>();
+    sb.vec4_f32 = vec4<f32>();
+    sb.vec4_i32 = vec4<i32>();
+    sb.vec4_u32 = vec4<u32>();
+    sb.mat2x2_f32 = mat2x2<f32>();
+    sb.mat2x3_f32 = mat2x3<f32>();
+    sb.mat2x4_f32 = mat2x4<f32>();
+    sb.mat3x2_f32 = mat3x2<f32>();
+    sb.mat3x3_f32 = mat3x3<f32>();
+    sb.mat3x4_f32 = mat3x4<f32>();
+    sb.mat4x2_f32 = mat4x2<f32>();
+    sb.mat4x3_f32 = mat4x3<f32>();
+    sb.mat4x4_f32 = mat4x4<f32>();
+    sb.arr2_vec3_f32 = array<vec3<f32>, 2>();
+    sb.struct_inner = Inner();
+    sb.array_struct_inner = array<Inner, 4>();
 }

diff --git a/test/tint/buffer/storage/static_index/write.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/static_index/write.wgsl.expected.dxc.hlsl
index 0717f6d..1fcb362 100644
--- a/test/tint/buffer/storage/static_index/write.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/storage/static_index/write.wgsl.expected.dxc.hlsl

@@ -1,46 +1,115 @@
 struct Inner {
-  int x;
+  int scalar_i32;
+  float scalar_f32;
 };
 
-RWByteAddressBuffer s : register(u0, space0);
+RWByteAddressBuffer sb : register(u0, space0);
 
-void tint_symbol_6(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+void tint_symbol_12(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_13(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
 }
 
-void tint_symbol_7(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+void tint_symbol_14(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_15(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
   buffer.Store2((offset + 0u), asuint(value[0u]));
   buffer.Store2((offset + 8u), asuint(value[1u]));
   buffer.Store2((offset + 16u), asuint(value[2u]));
 }
 
-void tint_symbol_9(RWByteAddressBuffer buffer, uint offset, Inner value) {
-  buffer.Store((offset + 0u), asuint(value.x));
+void tint_symbol_16(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
 }
 
-void tint_symbol_10(RWByteAddressBuffer buffer, uint offset, Inner value[4]) {
-  Inner array[4] = value;
+void tint_symbol_17(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_18(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_19(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_20(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_21(RWByteAddressBuffer buffer, uint offset, float3 value[2]) {
+  float3 array[2] = value;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      buffer.Store3((offset + (i * 16u)), asuint(array[i]));
+    }
+  }
+}
+
+void tint_symbol_22(RWByteAddressBuffer buffer, uint offset, Inner value) {
+  buffer.Store((offset + 0u), asuint(value.scalar_i32));
+  buffer.Store((offset + 4u), asuint(value.scalar_f32));
+}
+
+void tint_symbol_23(RWByteAddressBuffer buffer, uint offset, Inner value[4]) {
+  Inner array_1[4] = value;
   {
     for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      tint_symbol_9(buffer, (offset + (i_1 * 4u)), array[i_1]);
+      tint_symbol_22(buffer, (offset + (i_1 * 8u)), array_1[i_1]);
     }
   }
 }
 
 [numthreads(1, 1, 1)]
 void main() {
-  s.Store3(0u, asuint((0).xxx));
-  s.Store(12u, asuint(0));
-  s.Store3(16u, asuint((0u).xxx));
-  s.Store(28u, asuint(0u));
-  s.Store3(32u, asuint((0.0f).xxx));
-  s.Store(44u, asuint(0.0f));
-  tint_symbol_6(s, 48u, float2x3((0.0f).xxx, (0.0f).xxx));
-  tint_symbol_7(s, 80u, float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
-  const Inner tint_symbol_11 = (Inner)0;
-  tint_symbol_9(s, 104u, tint_symbol_11);
-  const Inner tint_symbol_12[4] = (Inner[4])0;
-  tint_symbol_10(s, 108u, tint_symbol_12);
+  sb.Store(0u, asuint(0.0f));
+  sb.Store(4u, asuint(0));
+  sb.Store(8u, asuint(0u));
+  sb.Store2(16u, asuint((0.0f).xx));
+  sb.Store2(24u, asuint((0).xx));
+  sb.Store2(32u, asuint((0u).xx));
+  sb.Store3(48u, asuint((0.0f).xxx));
+  sb.Store3(64u, asuint((0).xxx));
+  sb.Store3(80u, asuint((0u).xxx));
+  sb.Store4(96u, asuint((0.0f).xxxx));
+  sb.Store4(112u, asuint((0).xxxx));
+  sb.Store4(128u, asuint((0u).xxxx));
+  tint_symbol_12(sb, 144u, float2x2((0.0f).xx, (0.0f).xx));
+  tint_symbol_13(sb, 160u, float2x3((0.0f).xxx, (0.0f).xxx));
+  tint_symbol_14(sb, 192u, float2x4((0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_15(sb, 224u, float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_16(sb, 256u, float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_17(sb, 304u, float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_18(sb, 352u, float4x2((0.0f).xx, (0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_19(sb, 384u, float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_20(sb, 448u, float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  const float3 tint_symbol_24[2] = (float3[2])0;
+  tint_symbol_21(sb, 512u, tint_symbol_24);
+  const Inner tint_symbol_25 = (Inner)0;
+  tint_symbol_22(sb, 544u, tint_symbol_25);
+  const Inner tint_symbol_26[4] = (Inner[4])0;
+  tint_symbol_23(sb, 552u, tint_symbol_26);
   return;
 }

diff --git a/test/tint/buffer/storage/static_index/write.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/static_index/write.wgsl.expected.fxc.hlsl
index 0717f6d..1fcb362 100644
--- a/test/tint/buffer/storage/static_index/write.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/storage/static_index/write.wgsl.expected.fxc.hlsl

@@ -1,46 +1,115 @@
 struct Inner {
-  int x;
+  int scalar_i32;
+  float scalar_f32;
 };
 
-RWByteAddressBuffer s : register(u0, space0);
+RWByteAddressBuffer sb : register(u0, space0);
 
-void tint_symbol_6(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+void tint_symbol_12(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_13(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
 }
 
-void tint_symbol_7(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+void tint_symbol_14(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_15(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
   buffer.Store2((offset + 0u), asuint(value[0u]));
   buffer.Store2((offset + 8u), asuint(value[1u]));
   buffer.Store2((offset + 16u), asuint(value[2u]));
 }
 
-void tint_symbol_9(RWByteAddressBuffer buffer, uint offset, Inner value) {
-  buffer.Store((offset + 0u), asuint(value.x));
+void tint_symbol_16(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
 }
 
-void tint_symbol_10(RWByteAddressBuffer buffer, uint offset, Inner value[4]) {
-  Inner array[4] = value;
+void tint_symbol_17(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_18(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_19(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_20(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_21(RWByteAddressBuffer buffer, uint offset, float3 value[2]) {
+  float3 array[2] = value;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      buffer.Store3((offset + (i * 16u)), asuint(array[i]));
+    }
+  }
+}
+
+void tint_symbol_22(RWByteAddressBuffer buffer, uint offset, Inner value) {
+  buffer.Store((offset + 0u), asuint(value.scalar_i32));
+  buffer.Store((offset + 4u), asuint(value.scalar_f32));
+}
+
+void tint_symbol_23(RWByteAddressBuffer buffer, uint offset, Inner value[4]) {
+  Inner array_1[4] = value;
   {
     for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      tint_symbol_9(buffer, (offset + (i_1 * 4u)), array[i_1]);
+      tint_symbol_22(buffer, (offset + (i_1 * 8u)), array_1[i_1]);
     }
   }
 }
 
 [numthreads(1, 1, 1)]
 void main() {
-  s.Store3(0u, asuint((0).xxx));
-  s.Store(12u, asuint(0));
-  s.Store3(16u, asuint((0u).xxx));
-  s.Store(28u, asuint(0u));
-  s.Store3(32u, asuint((0.0f).xxx));
-  s.Store(44u, asuint(0.0f));
-  tint_symbol_6(s, 48u, float2x3((0.0f).xxx, (0.0f).xxx));
-  tint_symbol_7(s, 80u, float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
-  const Inner tint_symbol_11 = (Inner)0;
-  tint_symbol_9(s, 104u, tint_symbol_11);
-  const Inner tint_symbol_12[4] = (Inner[4])0;
-  tint_symbol_10(s, 108u, tint_symbol_12);
+  sb.Store(0u, asuint(0.0f));
+  sb.Store(4u, asuint(0));
+  sb.Store(8u, asuint(0u));
+  sb.Store2(16u, asuint((0.0f).xx));
+  sb.Store2(24u, asuint((0).xx));
+  sb.Store2(32u, asuint((0u).xx));
+  sb.Store3(48u, asuint((0.0f).xxx));
+  sb.Store3(64u, asuint((0).xxx));
+  sb.Store3(80u, asuint((0u).xxx));
+  sb.Store4(96u, asuint((0.0f).xxxx));
+  sb.Store4(112u, asuint((0).xxxx));
+  sb.Store4(128u, asuint((0u).xxxx));
+  tint_symbol_12(sb, 144u, float2x2((0.0f).xx, (0.0f).xx));
+  tint_symbol_13(sb, 160u, float2x3((0.0f).xxx, (0.0f).xxx));
+  tint_symbol_14(sb, 192u, float2x4((0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_15(sb, 224u, float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_16(sb, 256u, float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_17(sb, 304u, float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_18(sb, 352u, float4x2((0.0f).xx, (0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_19(sb, 384u, float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_20(sb, 448u, float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  const float3 tint_symbol_24[2] = (float3[2])0;
+  tint_symbol_21(sb, 512u, tint_symbol_24);
+  const Inner tint_symbol_25 = (Inner)0;
+  tint_symbol_22(sb, 544u, tint_symbol_25);
+  const Inner tint_symbol_26[4] = (Inner[4])0;
+  tint_symbol_23(sb, 552u, tint_symbol_26);
   return;
 }

diff --git a/test/tint/buffer/storage/static_index/write.wgsl.expected.glsl b/test/tint/buffer/storage/static_index/write.wgsl.expected.glsl
index d6b3a38..636c5c3 100644
--- a/test/tint/buffer/storage/static_index/write.wgsl.expected.glsl
+++ b/test/tint/buffer/storage/static_index/write.wgsl.expected.glsl

@@ -1,40 +1,79 @@
 #version 310 es
 
 struct Inner {
-  int x;
+  int scalar_i32;
+  float scalar_f32;
 };
 
 struct S {
-  ivec3 a;
-  int b;
-  uvec3 c;
-  uint d;
-  vec3 e;
-  float f;
-  mat2x3 g;
-  mat3x2 h;
-  Inner i;
-  Inner j[4];
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
   uint pad;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  uint pad_1;
+  uint pad_2;
+  vec3 vec3_f32;
+  uint pad_3;
+  ivec3 vec3_i32;
+  uint pad_4;
+  uvec3 vec3_u32;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  mat2 mat2x2_f32;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_6;
+  uint pad_7;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  vec3 arr2_vec3_f32[2];
+  Inner struct_inner;
+  Inner array_struct_inner[4];
+  uint pad_8;
+  uint pad_9;
 };
 
-layout(binding = 0, std430) buffer s_block_ssbo {
+layout(binding = 0, std430) buffer sb_block_ssbo {
   S inner;
-} s;
+} sb;
 
 void tint_symbol() {
-  s.inner.a = ivec3(0);
-  s.inner.b = 0;
-  s.inner.c = uvec3(0u);
-  s.inner.d = 0u;
-  s.inner.e = vec3(0.0f);
-  s.inner.f = 0.0f;
-  s.inner.g = mat2x3(vec3(0.0f), vec3(0.0f));
-  s.inner.h = mat3x2(vec2(0.0f), vec2(0.0f), vec2(0.0f));
-  Inner tint_symbol_1 = Inner(0);
-  s.inner.i = tint_symbol_1;
-  Inner tint_symbol_2[4] = Inner[4](Inner(0), Inner(0), Inner(0), Inner(0));
-  s.inner.j = tint_symbol_2;
+  sb.inner.scalar_f32 = 0.0f;
+  sb.inner.scalar_i32 = 0;
+  sb.inner.scalar_u32 = 0u;
+  sb.inner.vec2_f32 = vec2(0.0f);
+  sb.inner.vec2_i32 = ivec2(0);
+  sb.inner.vec2_u32 = uvec2(0u);
+  sb.inner.vec3_f32 = vec3(0.0f);
+  sb.inner.vec3_i32 = ivec3(0);
+  sb.inner.vec3_u32 = uvec3(0u);
+  sb.inner.vec4_f32 = vec4(0.0f);
+  sb.inner.vec4_i32 = ivec4(0);
+  sb.inner.vec4_u32 = uvec4(0u);
+  sb.inner.mat2x2_f32 = mat2(vec2(0.0f), vec2(0.0f));
+  sb.inner.mat2x3_f32 = mat2x3(vec3(0.0f), vec3(0.0f));
+  sb.inner.mat2x4_f32 = mat2x4(vec4(0.0f), vec4(0.0f));
+  sb.inner.mat3x2_f32 = mat3x2(vec2(0.0f), vec2(0.0f), vec2(0.0f));
+  sb.inner.mat3x3_f32 = mat3(vec3(0.0f), vec3(0.0f), vec3(0.0f));
+  sb.inner.mat3x4_f32 = mat3x4(vec4(0.0f), vec4(0.0f), vec4(0.0f));
+  sb.inner.mat4x2_f32 = mat4x2(vec2(0.0f), vec2(0.0f), vec2(0.0f), vec2(0.0f));
+  sb.inner.mat4x3_f32 = mat4x3(vec3(0.0f), vec3(0.0f), vec3(0.0f), vec3(0.0f));
+  sb.inner.mat4x4_f32 = mat4(vec4(0.0f), vec4(0.0f), vec4(0.0f), vec4(0.0f));
+  vec3 tint_symbol_1[2] = vec3[2](vec3(0.0f), vec3(0.0f));
+  sb.inner.arr2_vec3_f32 = tint_symbol_1;
+  Inner tint_symbol_2 = Inner(0, 0.0f);
+  sb.inner.struct_inner = tint_symbol_2;
+  Inner tint_symbol_3[4] = Inner[4](Inner(0, 0.0f), Inner(0, 0.0f), Inner(0, 0.0f), Inner(0, 0.0f));
+  sb.inner.array_struct_inner = tint_symbol_3;
 }
 
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

diff --git a/test/tint/buffer/storage/static_index/write.wgsl.expected.msl b/test/tint/buffer/storage/static_index/write.wgsl.expected.msl
index 1b21710..838c6a8 100644
--- a/test/tint/buffer/storage/static_index/write.wgsl.expected.msl
+++ b/test/tint/buffer/storage/static_index/write.wgsl.expected.msl

@@ -15,36 +15,72 @@
 };
 
 struct Inner {
-  /* 0x0000 */ int x;
+  /* 0x0000 */ int scalar_i32;
+  /* 0x0004 */ float scalar_f32;
 };
 
 struct S {
-  /* 0x0000 */ packed_int3 a;
-  /* 0x000c */ int b;
-  /* 0x0010 */ packed_uint3 c;
-  /* 0x001c */ uint d;
-  /* 0x0020 */ packed_float3 e;
-  /* 0x002c */ float f;
-  /* 0x0030 */ float2x3 g;
-  /* 0x0050 */ float3x2 h;
-  /* 0x0068 */ Inner i;
-  /* 0x006c */ tint_array<Inner, 4> j;
-  /* 0x007c */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ tint_array<int8_t, 8> tint_pad_1;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_3;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0060 */ float4 vec4_f32;
+  /* 0x0070 */ int4 vec4_i32;
+  /* 0x0080 */ uint4 vec4_u32;
+  /* 0x0090 */ float2x2 mat2x2_f32;
+  /* 0x00a0 */ float2x3 mat2x3_f32;
+  /* 0x00c0 */ float2x4 mat2x4_f32;
+  /* 0x00e0 */ float3x2 mat3x2_f32;
+  /* 0x00f8 */ tint_array<int8_t, 8> tint_pad_5;
+  /* 0x0100 */ float3x3 mat3x3_f32;
+  /* 0x0130 */ float3x4 mat3x4_f32;
+  /* 0x0160 */ float4x2 mat4x2_f32;
+  /* 0x0180 */ float4x3 mat4x3_f32;
+  /* 0x01c0 */ float4x4 mat4x4_f32;
+  /* 0x0200 */ tint_array<float3, 2> arr2_vec3_f32;
+  /* 0x0220 */ Inner struct_inner;
+  /* 0x0228 */ tint_array<Inner, 4> array_struct_inner;
+  /* 0x0248 */ tint_array<int8_t, 8> tint_pad_6;
 };
 
-kernel void tint_symbol(device S* tint_symbol_3 [[buffer(0)]]) {
-  (*(tint_symbol_3)).a = int3(0);
-  (*(tint_symbol_3)).b = 0;
-  (*(tint_symbol_3)).c = uint3(0u);
-  (*(tint_symbol_3)).d = 0u;
-  (*(tint_symbol_3)).e = float3(0.0f);
-  (*(tint_symbol_3)).f = 0.0f;
-  (*(tint_symbol_3)).g = float2x3(float3(0.0f), float3(0.0f));
-  (*(tint_symbol_3)).h = float3x2(float2(0.0f), float2(0.0f), float2(0.0f));
-  Inner const tint_symbol_1 = Inner{};
-  (*(tint_symbol_3)).i = tint_symbol_1;
-  tint_array<Inner, 4> const tint_symbol_2 = tint_array<Inner, 4>{};
-  (*(tint_symbol_3)).j = tint_symbol_2;
+kernel void tint_symbol(device S* tint_symbol_4 [[buffer(0)]]) {
+  (*(tint_symbol_4)).scalar_f32 = 0.0f;
+  (*(tint_symbol_4)).scalar_i32 = 0;
+  (*(tint_symbol_4)).scalar_u32 = 0u;
+  (*(tint_symbol_4)).vec2_f32 = float2(0.0f);
+  (*(tint_symbol_4)).vec2_i32 = int2(0);
+  (*(tint_symbol_4)).vec2_u32 = uint2(0u);
+  (*(tint_symbol_4)).vec3_f32 = float3(0.0f);
+  (*(tint_symbol_4)).vec3_i32 = int3(0);
+  (*(tint_symbol_4)).vec3_u32 = uint3(0u);
+  (*(tint_symbol_4)).vec4_f32 = float4(0.0f);
+  (*(tint_symbol_4)).vec4_i32 = int4(0);
+  (*(tint_symbol_4)).vec4_u32 = uint4(0u);
+  (*(tint_symbol_4)).mat2x2_f32 = float2x2(float2(0.0f), float2(0.0f));
+  (*(tint_symbol_4)).mat2x3_f32 = float2x3(float3(0.0f), float3(0.0f));
+  (*(tint_symbol_4)).mat2x4_f32 = float2x4(float4(0.0f), float4(0.0f));
+  (*(tint_symbol_4)).mat3x2_f32 = float3x2(float2(0.0f), float2(0.0f), float2(0.0f));
+  (*(tint_symbol_4)).mat3x3_f32 = float3x3(float3(0.0f), float3(0.0f), float3(0.0f));
+  (*(tint_symbol_4)).mat3x4_f32 = float3x4(float4(0.0f), float4(0.0f), float4(0.0f));
+  (*(tint_symbol_4)).mat4x2_f32 = float4x2(float2(0.0f), float2(0.0f), float2(0.0f), float2(0.0f));
+  (*(tint_symbol_4)).mat4x3_f32 = float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f));
+  (*(tint_symbol_4)).mat4x4_f32 = float4x4(float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f));
+  tint_array<float3, 2> const tint_symbol_1 = tint_array<float3, 2>{};
+  (*(tint_symbol_4)).arr2_vec3_f32 = tint_symbol_1;
+  Inner const tint_symbol_2 = Inner{};
+  (*(tint_symbol_4)).struct_inner = tint_symbol_2;
+  tint_array<Inner, 4> const tint_symbol_3 = tint_array<Inner, 4>{};
+  (*(tint_symbol_4)).array_struct_inner = tint_symbol_3;
   return;
 }
 

diff --git a/test/tint/buffer/storage/static_index/write.wgsl.expected.spvasm b/test/tint/buffer/storage/static_index/write.wgsl.expected.spvasm
index aae7630..2faa9c9 100644
--- a/test/tint/buffer/storage/static_index/write.wgsl.expected.spvasm
+++ b/test/tint/buffer/storage/static_index/write.wgsl.expected.spvasm

@@ -1,117 +1,245 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 0
-; Bound: 60
+; Bound: 129
 ; Schema: 0
                OpCapability Shader
                OpMemoryModel Logical GLSL450
                OpEntryPoint GLCompute %main "main"
                OpExecutionMode %main LocalSize 1 1 1
-               OpName %s_block "s_block"
-               OpMemberName %s_block 0 "inner"
+               OpName %sb_block "sb_block"
+               OpMemberName %sb_block 0 "inner"
                OpName %S "S"
-               OpMemberName %S 0 "a"
-               OpMemberName %S 1 "b"
-               OpMemberName %S 2 "c"
-               OpMemberName %S 3 "d"
-               OpMemberName %S 4 "e"
-               OpMemberName %S 5 "f"
-               OpMemberName %S 6 "g"
-               OpMemberName %S 7 "h"
-               OpMemberName %S 8 "i"
+               OpMemberName %S 0 "scalar_f32"
+               OpMemberName %S 1 "scalar_i32"
+               OpMemberName %S 2 "scalar_u32"
+               OpMemberName %S 3 "vec2_f32"
+               OpMemberName %S 4 "vec2_i32"
+               OpMemberName %S 5 "vec2_u32"
+               OpMemberName %S 6 "vec3_f32"
+               OpMemberName %S 7 "vec3_i32"
+               OpMemberName %S 8 "vec3_u32"
+               OpMemberName %S 9 "vec4_f32"
+               OpMemberName %S 10 "vec4_i32"
+               OpMemberName %S 11 "vec4_u32"
+               OpMemberName %S 12 "mat2x2_f32"
+               OpMemberName %S 13 "mat2x3_f32"
+               OpMemberName %S 14 "mat2x4_f32"
+               OpMemberName %S 15 "mat3x2_f32"
+               OpMemberName %S 16 "mat3x3_f32"
+               OpMemberName %S 17 "mat3x4_f32"
+               OpMemberName %S 18 "mat4x2_f32"
+               OpMemberName %S 19 "mat4x3_f32"
+               OpMemberName %S 20 "mat4x4_f32"
+               OpMemberName %S 21 "arr2_vec3_f32"
+               OpMemberName %S 22 "struct_inner"
                OpName %Inner "Inner"
-               OpMemberName %Inner 0 "x"
-               OpMemberName %S 9 "j"
-               OpName %s "s"
+               OpMemberName %Inner 0 "scalar_i32"
+               OpMemberName %Inner 1 "scalar_f32"
+               OpMemberName %S 23 "array_struct_inner"
+               OpName %sb "sb"
                OpName %main "main"
-               OpDecorate %s_block Block
-               OpMemberDecorate %s_block 0 Offset 0
+               OpDecorate %sb_block Block
+               OpMemberDecorate %sb_block 0 Offset 0
                OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 12
-               OpMemberDecorate %S 2 Offset 16
-               OpMemberDecorate %S 3 Offset 28
-               OpMemberDecorate %S 4 Offset 32
-               OpMemberDecorate %S 5 Offset 44
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 2 Offset 8
+               OpMemberDecorate %S 3 Offset 16
+               OpMemberDecorate %S 4 Offset 24
+               OpMemberDecorate %S 5 Offset 32
                OpMemberDecorate %S 6 Offset 48
-               OpMemberDecorate %S 6 ColMajor
-               OpMemberDecorate %S 6 MatrixStride 16
-               OpMemberDecorate %S 7 Offset 80
-               OpMemberDecorate %S 7 ColMajor
-               OpMemberDecorate %S 7 MatrixStride 8
-               OpMemberDecorate %S 8 Offset 104
+               OpMemberDecorate %S 7 Offset 64
+               OpMemberDecorate %S 8 Offset 80
+               OpMemberDecorate %S 9 Offset 96
+               OpMemberDecorate %S 10 Offset 112
+               OpMemberDecorate %S 11 Offset 128
+               OpMemberDecorate %S 12 Offset 144
+               OpMemberDecorate %S 12 ColMajor
+               OpMemberDecorate %S 12 MatrixStride 8
+               OpMemberDecorate %S 13 Offset 160
+               OpMemberDecorate %S 13 ColMajor
+               OpMemberDecorate %S 13 MatrixStride 16
+               OpMemberDecorate %S 14 Offset 192
+               OpMemberDecorate %S 14 ColMajor
+               OpMemberDecorate %S 14 MatrixStride 16
+               OpMemberDecorate %S 15 Offset 224
+               OpMemberDecorate %S 15 ColMajor
+               OpMemberDecorate %S 15 MatrixStride 8
+               OpMemberDecorate %S 16 Offset 256
+               OpMemberDecorate %S 16 ColMajor
+               OpMemberDecorate %S 16 MatrixStride 16
+               OpMemberDecorate %S 17 Offset 304
+               OpMemberDecorate %S 17 ColMajor
+               OpMemberDecorate %S 17 MatrixStride 16
+               OpMemberDecorate %S 18 Offset 352
+               OpMemberDecorate %S 18 ColMajor
+               OpMemberDecorate %S 18 MatrixStride 8
+               OpMemberDecorate %S 19 Offset 384
+               OpMemberDecorate %S 19 ColMajor
+               OpMemberDecorate %S 19 MatrixStride 16
+               OpMemberDecorate %S 20 Offset 448
+               OpMemberDecorate %S 20 ColMajor
+               OpMemberDecorate %S 20 MatrixStride 16
+               OpMemberDecorate %S 21 Offset 512
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpMemberDecorate %S 22 Offset 544
                OpMemberDecorate %Inner 0 Offset 0
-               OpMemberDecorate %S 9 Offset 108
-               OpDecorate %_arr_Inner_uint_4 ArrayStride 4
-               OpDecorate %s Binding 0
-               OpDecorate %s DescriptorSet 0
-        %int = OpTypeInt 32 1
-      %v3int = OpTypeVector %int 3
-       %uint = OpTypeInt 32 0
-     %v3uint = OpTypeVector %uint 3
+               OpMemberDecorate %Inner 1 Offset 4
+               OpMemberDecorate %S 23 Offset 552
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 8
+               OpDecorate %sb Binding 0
+               OpDecorate %sb DescriptorSet 0
       %float = OpTypeFloat 32
-    %v3float = OpTypeVector %float 3
-%mat2v3float = OpTypeMatrix %v3float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
     %v2float = OpTypeVector %float 2
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+    %v3float = OpTypeVector %float 3
+      %v3int = OpTypeVector %int 3
+     %v3uint = OpTypeVector %uint 3
+    %v4float = OpTypeVector %float 4
+      %v4int = OpTypeVector %int 4
+     %v4uint = OpTypeVector %uint 4
+%mat2v2float = OpTypeMatrix %v2float 2
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
 %mat3v2float = OpTypeMatrix %v2float 3
-      %Inner = OpTypeStruct %int
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v2float = OpTypeMatrix %v2float 4
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+      %Inner = OpTypeStruct %int %float
      %uint_4 = OpConstant %uint 4
 %_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
-          %S = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %mat2v3float %mat3v2float %Inner %_arr_Inner_uint_4
-    %s_block = OpTypeStruct %S
-%_ptr_StorageBuffer_s_block = OpTypePointer StorageBuffer %s_block
-          %s = OpVariable %_ptr_StorageBuffer_s_block StorageBuffer
+          %S = OpTypeStruct %float %int %uint %v2float %v2int %v2uint %v3float %v3int %v3uint %v4float %v4int %v4uint %mat2v2float %mat2v3float %mat2v4float %mat3v2float %mat3v3float %mat3v4float %mat4v2float %mat4v3float %mat4v4float %_arr_v3float_uint_2 %Inner %_arr_Inner_uint_4
+   %sb_block = OpTypeStruct %S
+%_ptr_StorageBuffer_sb_block = OpTypePointer StorageBuffer %sb_block
+         %sb = OpVariable %_ptr_StorageBuffer_sb_block StorageBuffer
        %void = OpTypeVoid
-         %17 = OpTypeFunction %void
+         %31 = OpTypeFunction %void
      %uint_0 = OpConstant %uint 0
-%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
-         %24 = OpConstantNull %v3int
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+         %38 = OpConstantNull %float
      %uint_1 = OpConstant %uint 1
 %_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
-         %28 = OpConstantNull %int
-     %uint_2 = OpConstant %uint 2
-%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
-         %32 = OpConstantNull %v3uint
-     %uint_3 = OpConstant %uint 3
+         %42 = OpConstantNull %int
 %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
-         %36 = OpConstantNull %uint
-%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
-         %39 = OpConstantNull %v3float
+         %45 = OpConstantNull %uint
+     %uint_3 = OpConstant %uint 3
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+         %49 = OpConstantNull %v2float
+%_ptr_StorageBuffer_v2int = OpTypePointer StorageBuffer %v2int
+         %52 = OpConstantNull %v2int
      %uint_5 = OpConstant %uint 5
-%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
-         %43 = OpConstantNull %float
+%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
+         %56 = OpConstantNull %v2uint
      %uint_6 = OpConstant %uint 6
-%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
-         %47 = OpConstantNull %mat2v3float
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+         %60 = OpConstantNull %v3float
      %uint_7 = OpConstant %uint 7
-%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
-         %51 = OpConstantNull %mat3v2float
+%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
+         %64 = OpConstantNull %v3int
      %uint_8 = OpConstant %uint 8
-%_ptr_StorageBuffer_Inner = OpTypePointer StorageBuffer %Inner
-         %55 = OpConstantNull %Inner
+%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
+         %68 = OpConstantNull %v3uint
      %uint_9 = OpConstant %uint 9
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+         %72 = OpConstantNull %v4float
+    %uint_10 = OpConstant %uint 10
+%_ptr_StorageBuffer_v4int = OpTypePointer StorageBuffer %v4int
+         %76 = OpConstantNull %v4int
+    %uint_11 = OpConstant %uint 11
+%_ptr_StorageBuffer_v4uint = OpTypePointer StorageBuffer %v4uint
+         %80 = OpConstantNull %v4uint
+    %uint_12 = OpConstant %uint 12
+%_ptr_StorageBuffer_mat2v2float = OpTypePointer StorageBuffer %mat2v2float
+         %84 = OpConstantNull %mat2v2float
+    %uint_13 = OpConstant %uint 13
+%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+         %88 = OpConstantNull %mat2v3float
+    %uint_14 = OpConstant %uint 14
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+         %92 = OpConstantNull %mat2v4float
+    %uint_15 = OpConstant %uint 15
+%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+         %96 = OpConstantNull %mat3v2float
+    %uint_16 = OpConstant %uint 16
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+        %100 = OpConstantNull %mat3v3float
+    %uint_17 = OpConstant %uint 17
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+        %104 = OpConstantNull %mat3v4float
+    %uint_18 = OpConstant %uint 18
+%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
+        %108 = OpConstantNull %mat4v2float
+    %uint_19 = OpConstant %uint 19
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+        %112 = OpConstantNull %mat4v3float
+    %uint_20 = OpConstant %uint 20
+%_ptr_StorageBuffer_mat4v4float = OpTypePointer StorageBuffer %mat4v4float
+        %116 = OpConstantNull %mat4v4float
+    %uint_21 = OpConstant %uint 21
+%_ptr_StorageBuffer__arr_v3float_uint_2 = OpTypePointer StorageBuffer %_arr_v3float_uint_2
+        %120 = OpConstantNull %_arr_v3float_uint_2
+    %uint_22 = OpConstant %uint 22
+%_ptr_StorageBuffer_Inner = OpTypePointer StorageBuffer %Inner
+        %124 = OpConstantNull %Inner
+    %uint_23 = OpConstant %uint 23
 %_ptr_StorageBuffer__arr_Inner_uint_4 = OpTypePointer StorageBuffer %_arr_Inner_uint_4
-         %59 = OpConstantNull %_arr_Inner_uint_4
-       %main = OpFunction %void None %17
-         %20 = OpLabel
-         %23 = OpAccessChain %_ptr_StorageBuffer_v3int %s %uint_0 %uint_0
-               OpStore %23 %24
-         %27 = OpAccessChain %_ptr_StorageBuffer_int %s %uint_0 %uint_1
-               OpStore %27 %28
-         %31 = OpAccessChain %_ptr_StorageBuffer_v3uint %s %uint_0 %uint_2
-               OpStore %31 %32
-         %35 = OpAccessChain %_ptr_StorageBuffer_uint %s %uint_0 %uint_3
-               OpStore %35 %36
-         %38 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %uint_4
-               OpStore %38 %39
-         %42 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %uint_5
-               OpStore %42 %43
-         %46 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %s %uint_0 %uint_6
-               OpStore %46 %47
-         %50 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %s %uint_0 %uint_7
-               OpStore %50 %51
-         %54 = OpAccessChain %_ptr_StorageBuffer_Inner %s %uint_0 %uint_8
-               OpStore %54 %55
-         %58 = OpAccessChain %_ptr_StorageBuffer__arr_Inner_uint_4 %s %uint_0 %uint_9
-               OpStore %58 %59
+        %128 = OpConstantNull %_arr_Inner_uint_4
+       %main = OpFunction %void None %31
+         %34 = OpLabel
+         %37 = OpAccessChain %_ptr_StorageBuffer_float %sb %uint_0 %uint_0
+               OpStore %37 %38
+         %41 = OpAccessChain %_ptr_StorageBuffer_int %sb %uint_0 %uint_1
+               OpStore %41 %42
+         %44 = OpAccessChain %_ptr_StorageBuffer_uint %sb %uint_0 %uint_2
+               OpStore %44 %45
+         %48 = OpAccessChain %_ptr_StorageBuffer_v2float %sb %uint_0 %uint_3
+               OpStore %48 %49
+         %51 = OpAccessChain %_ptr_StorageBuffer_v2int %sb %uint_0 %uint_4
+               OpStore %51 %52
+         %55 = OpAccessChain %_ptr_StorageBuffer_v2uint %sb %uint_0 %uint_5
+               OpStore %55 %56
+         %59 = OpAccessChain %_ptr_StorageBuffer_v3float %sb %uint_0 %uint_6
+               OpStore %59 %60
+         %63 = OpAccessChain %_ptr_StorageBuffer_v3int %sb %uint_0 %uint_7
+               OpStore %63 %64
+         %67 = OpAccessChain %_ptr_StorageBuffer_v3uint %sb %uint_0 %uint_8
+               OpStore %67 %68
+         %71 = OpAccessChain %_ptr_StorageBuffer_v4float %sb %uint_0 %uint_9
+               OpStore %71 %72
+         %75 = OpAccessChain %_ptr_StorageBuffer_v4int %sb %uint_0 %uint_10
+               OpStore %75 %76
+         %79 = OpAccessChain %_ptr_StorageBuffer_v4uint %sb %uint_0 %uint_11
+               OpStore %79 %80
+         %83 = OpAccessChain %_ptr_StorageBuffer_mat2v2float %sb %uint_0 %uint_12
+               OpStore %83 %84
+         %87 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %sb %uint_0 %uint_13
+               OpStore %87 %88
+         %91 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %sb %uint_0 %uint_14
+               OpStore %91 %92
+         %95 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %sb %uint_0 %uint_15
+               OpStore %95 %96
+         %99 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %sb %uint_0 %uint_16
+               OpStore %99 %100
+        %103 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %sb %uint_0 %uint_17
+               OpStore %103 %104
+        %107 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %sb %uint_0 %uint_18
+               OpStore %107 %108
+        %111 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %sb %uint_0 %uint_19
+               OpStore %111 %112
+        %115 = OpAccessChain %_ptr_StorageBuffer_mat4v4float %sb %uint_0 %uint_20
+               OpStore %115 %116
+        %119 = OpAccessChain %_ptr_StorageBuffer__arr_v3float_uint_2 %sb %uint_0 %uint_21
+               OpStore %119 %120
+        %123 = OpAccessChain %_ptr_StorageBuffer_Inner %sb %uint_0 %uint_22
+               OpStore %123 %124
+        %127 = OpAccessChain %_ptr_StorageBuffer__arr_Inner_uint_4 %sb %uint_0 %uint_23
+               OpStore %127 %128
                OpReturn
                OpFunctionEnd

diff --git a/test/tint/buffer/storage/static_index/write.wgsl.expected.wgsl b/test/tint/buffer/storage/static_index/write.wgsl.expected.wgsl
index af8c2de..448df1f 100644
--- a/test/tint/buffer/storage/static_index/write.wgsl.expected.wgsl
+++ b/test/tint/buffer/storage/static_index/write.wgsl.expected.wgsl

@@ -1,32 +1,61 @@
 struct Inner {
-  x : i32,
+  scalar_i32 : i32,
+  scalar_f32 : f32,
 }
 
 struct S {
-  a : vec3<i32>,
-  b : i32,
-  c : vec3<u32>,
-  d : u32,
-  e : vec3<f32>,
-  f : f32,
-  g : mat2x3<f32>,
-  h : mat3x2<f32>,
-  i : Inner,
-  j : array<Inner, 4>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  struct_inner : Inner,
+  array_struct_inner : array<Inner, 4>,
 }
 
-@binding(0) @group(0) var<storage, read_write> s : S;
+@binding(0) @group(0) var<storage, read_write> sb : S;
 
 @compute @workgroup_size(1)
 fn main() {
-  s.a = vec3<i32>();
-  s.b = i32();
-  s.c = vec3<u32>();
-  s.d = u32();
-  s.e = vec3<f32>();
-  s.f = f32();
-  s.g = mat2x3<f32>();
-  s.h = mat3x2<f32>();
-  s.i = Inner();
-  s.j = array<Inner, 4>();
+  sb.scalar_f32 = f32();
+  sb.scalar_i32 = i32();
+  sb.scalar_u32 = u32();
+  sb.vec2_f32 = vec2<f32>();
+  sb.vec2_i32 = vec2<i32>();
+  sb.vec2_u32 = vec2<u32>();
+  sb.vec3_f32 = vec3<f32>();
+  sb.vec3_i32 = vec3<i32>();
+  sb.vec3_u32 = vec3<u32>();
+  sb.vec4_f32 = vec4<f32>();
+  sb.vec4_i32 = vec4<i32>();
+  sb.vec4_u32 = vec4<u32>();
+  sb.mat2x2_f32 = mat2x2<f32>();
+  sb.mat2x3_f32 = mat2x3<f32>();
+  sb.mat2x4_f32 = mat2x4<f32>();
+  sb.mat3x2_f32 = mat3x2<f32>();
+  sb.mat3x3_f32 = mat3x3<f32>();
+  sb.mat3x4_f32 = mat3x4<f32>();
+  sb.mat4x2_f32 = mat4x2<f32>();
+  sb.mat4x3_f32 = mat4x3<f32>();
+  sb.mat4x4_f32 = mat4x4<f32>();
+  sb.arr2_vec3_f32 = array<vec3<f32>, 2>();
+  sb.struct_inner = Inner();
+  sb.array_struct_inner = array<Inner, 4>();
 }

diff --git a/test/tint/buffer/storage/static_index/write_f16.wgsl b/test/tint/buffer/storage/static_index/write_f16.wgsl
new file mode 100644
index 0000000..5e1d551
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/write_f16.wgsl

@@ -0,0 +1,92 @@
+enable f16;
+
+struct Inner {
+    scalar_i32 : i32,
+    scalar_f32 : f32,
+    scalar_f16 : f16,
+};
+
+struct S {
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    scalar_f16 : f16,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec2_f16 : vec2<f16>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec3_f16 : vec3<f16>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    vec4_f16 : vec4<f16>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    mat2x2_f16 : mat2x2<f16>,
+    mat2x3_f16 : mat2x3<f16>,
+    mat2x4_f16 : mat2x4<f16>,
+    mat3x2_f16 : mat3x2<f16>,
+    mat3x3_f16 : mat3x3<f16>,
+    mat3x4_f16 : mat3x4<f16>,
+    mat4x2_f16 : mat4x2<f16>,
+    mat4x3_f16 : mat4x3<f16>,
+    mat4x4_f16 : mat4x4<f16>,
+    arr2_vec3_f32 : array<vec3<f32>, 2>,
+    arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+    struct_inner : Inner,
+    array_struct_inner : array<Inner, 4>,
+};
+
+@binding(0) @group(0) var<storage, read_write> sb : S;
+
+@compute @workgroup_size(1)
+fn main() {
+    sb.scalar_f32 = f32();
+    sb.scalar_i32 = i32();
+    sb.scalar_u32 = u32();
+    sb.scalar_f16 = f16();
+    sb.vec2_f32 = vec2<f32>();
+    sb.vec2_i32 = vec2<i32>();
+    sb.vec2_u32 = vec2<u32>();
+    sb.vec2_f16 = vec2<f16>();
+    sb.vec3_f32 = vec3<f32>();
+    sb.vec3_i32 = vec3<i32>();
+    sb.vec3_u32 = vec3<u32>();
+    sb.vec3_f16 = vec3<f16>();
+    sb.vec4_f32 = vec4<f32>();
+    sb.vec4_i32 = vec4<i32>();
+    sb.vec4_u32 = vec4<u32>();
+    sb.vec4_f16 = vec4<f16>();
+    sb.mat2x2_f32 = mat2x2<f32>();
+    sb.mat2x3_f32 = mat2x3<f32>();
+    sb.mat2x4_f32 = mat2x4<f32>();
+    sb.mat3x2_f32 = mat3x2<f32>();
+    sb.mat3x3_f32 = mat3x3<f32>();
+    sb.mat3x4_f32 = mat3x4<f32>();
+    sb.mat4x2_f32 = mat4x2<f32>();
+    sb.mat4x3_f32 = mat4x3<f32>();
+    sb.mat4x4_f32 = mat4x4<f32>();
+    sb.mat2x2_f16 = mat2x2<f16>();
+    sb.mat2x3_f16 = mat2x3<f16>();
+    sb.mat2x4_f16 = mat2x4<f16>();
+    sb.mat3x2_f16 = mat3x2<f16>();
+    sb.mat3x3_f16 = mat3x3<f16>();
+    sb.mat3x4_f16 = mat3x4<f16>();
+    sb.mat4x2_f16 = mat4x2<f16>();
+    sb.mat4x3_f16 = mat4x3<f16>();
+    sb.mat4x4_f16 = mat4x4<f16>();
+    sb.arr2_vec3_f32 = array<vec3<f32>, 2>();
+    sb.arr2_mat4x2_f16 = array<mat4x2<f16>, 2>();
+    sb.struct_inner = Inner();
+    sb.array_struct_inner = array<Inner, 4>();
+}

diff --git a/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..526b6f5
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,195 @@
+struct Inner {
+  int scalar_i32;
+  float scalar_f32;
+  float16_t scalar_f16;
+};
+
+RWByteAddressBuffer sb : register(u0, space0);
+
+void tint_symbol_16(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_17(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_18(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_19(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+}
+
+void tint_symbol_20(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_21(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_22(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_23(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_24(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_25(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+}
+
+void tint_symbol_26(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_27(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_28(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+}
+
+void tint_symbol_29(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_30(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_31(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+void tint_symbol_32(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_33(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_34(RWByteAddressBuffer buffer, uint offset, float3 value[2]) {
+  float3 array[2] = value;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      buffer.Store3((offset + (i * 16u)), asuint(array[i]));
+    }
+  }
+}
+
+void tint_symbol_35(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value[2]) {
+  matrix<float16_t, 4, 2> array_1[2] = value;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      tint_symbol_31(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
+    }
+  }
+}
+
+void tint_symbol_36(RWByteAddressBuffer buffer, uint offset, Inner value) {
+  buffer.Store((offset + 0u), asuint(value.scalar_i32));
+  buffer.Store((offset + 4u), asuint(value.scalar_f32));
+  buffer.Store<float16_t>((offset + 8u), value.scalar_f16);
+}
+
+void tint_symbol_37(RWByteAddressBuffer buffer, uint offset, Inner value[4]) {
+  Inner array_2[4] = value;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      tint_symbol_36(buffer, (offset + (i_2 * 12u)), array_2[i_2]);
+    }
+  }
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  sb.Store(0u, asuint(0.0f));
+  sb.Store(4u, asuint(0));
+  sb.Store(8u, asuint(0u));
+  sb.Store<float16_t>(12u, float16_t(0.0h));
+  sb.Store2(16u, asuint((0.0f).xx));
+  sb.Store2(24u, asuint((0).xx));
+  sb.Store2(32u, asuint((0u).xx));
+  sb.Store<vector<float16_t, 2> >(40u, (float16_t(0.0h)).xx);
+  sb.Store3(48u, asuint((0.0f).xxx));
+  sb.Store3(64u, asuint((0).xxx));
+  sb.Store3(80u, asuint((0u).xxx));
+  sb.Store<vector<float16_t, 3> >(96u, (float16_t(0.0h)).xxx);
+  sb.Store4(112u, asuint((0.0f).xxxx));
+  sb.Store4(128u, asuint((0).xxxx));
+  sb.Store4(144u, asuint((0u).xxxx));
+  sb.Store<vector<float16_t, 4> >(160u, (float16_t(0.0h)).xxxx);
+  tint_symbol_16(sb, 168u, float2x2((0.0f).xx, (0.0f).xx));
+  tint_symbol_17(sb, 192u, float2x3((0.0f).xxx, (0.0f).xxx));
+  tint_symbol_18(sb, 224u, float2x4((0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_19(sb, 256u, float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_20(sb, 288u, float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_21(sb, 336u, float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_22(sb, 384u, float4x2((0.0f).xx, (0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_23(sb, 416u, float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_24(sb, 480u, float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_25(sb, 544u, matrix<float16_t, 2, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_26(sb, 552u, matrix<float16_t, 2, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_27(sb, 568u, matrix<float16_t, 2, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  tint_symbol_28(sb, 584u, matrix<float16_t, 3, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_29(sb, 600u, matrix<float16_t, 3, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_30(sb, 624u, matrix<float16_t, 3, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  tint_symbol_31(sb, 648u, matrix<float16_t, 4, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_32(sb, 664u, matrix<float16_t, 4, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_33(sb, 696u, matrix<float16_t, 4, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  const float3 tint_symbol_38[2] = (float3[2])0;
+  tint_symbol_34(sb, 736u, tint_symbol_38);
+  const matrix<float16_t, 4, 2> tint_symbol_39[2] = (matrix<float16_t, 4, 2>[2])0;
+  tint_symbol_35(sb, 768u, tint_symbol_39);
+  const Inner tint_symbol_40 = (Inner)0;
+  tint_symbol_36(sb, 800u, tint_symbol_40);
+  const Inner tint_symbol_41[4] = (Inner[4])0;
+  tint_symbol_37(sb, 812u, tint_symbol_41);
+  return;
+}

diff --git a/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f54b4e4
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,200 @@
+SKIP: FAILED
+
+struct Inner {
+  int scalar_i32;
+  float scalar_f32;
+  float16_t scalar_f16;
+};
+
+RWByteAddressBuffer sb : register(u0, space0);
+
+void tint_symbol_16(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_17(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_18(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_19(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+}
+
+void tint_symbol_20(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_21(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_22(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_23(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_24(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_25(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+}
+
+void tint_symbol_26(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_27(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_28(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+}
+
+void tint_symbol_29(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_30(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_31(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+void tint_symbol_32(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_33(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_34(RWByteAddressBuffer buffer, uint offset, float3 value[2]) {
+  float3 array[2] = value;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      buffer.Store3((offset + (i * 16u)), asuint(array[i]));
+    }
+  }
+}
+
+void tint_symbol_35(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value[2]) {
+  matrix<float16_t, 4, 2> array_1[2] = value;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      tint_symbol_31(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
+    }
+  }
+}
+
+void tint_symbol_36(RWByteAddressBuffer buffer, uint offset, Inner value) {
+  buffer.Store((offset + 0u), asuint(value.scalar_i32));
+  buffer.Store((offset + 4u), asuint(value.scalar_f32));
+  buffer.Store<float16_t>((offset + 8u), value.scalar_f16);
+}
+
+void tint_symbol_37(RWByteAddressBuffer buffer, uint offset, Inner value[4]) {
+  Inner array_2[4] = value;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      tint_symbol_36(buffer, (offset + (i_2 * 12u)), array_2[i_2]);
+    }
+  }
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  sb.Store(0u, asuint(0.0f));
+  sb.Store(4u, asuint(0));
+  sb.Store(8u, asuint(0u));
+  sb.Store<float16_t>(12u, float16_t(0.0h));
+  sb.Store2(16u, asuint((0.0f).xx));
+  sb.Store2(24u, asuint((0).xx));
+  sb.Store2(32u, asuint((0u).xx));
+  sb.Store<vector<float16_t, 2> >(40u, (float16_t(0.0h)).xx);
+  sb.Store3(48u, asuint((0.0f).xxx));
+  sb.Store3(64u, asuint((0).xxx));
+  sb.Store3(80u, asuint((0u).xxx));
+  sb.Store<vector<float16_t, 3> >(96u, (float16_t(0.0h)).xxx);
+  sb.Store4(112u, asuint((0.0f).xxxx));
+  sb.Store4(128u, asuint((0).xxxx));
+  sb.Store4(144u, asuint((0u).xxxx));
+  sb.Store<vector<float16_t, 4> >(160u, (float16_t(0.0h)).xxxx);
+  tint_symbol_16(sb, 168u, float2x2((0.0f).xx, (0.0f).xx));
+  tint_symbol_17(sb, 192u, float2x3((0.0f).xxx, (0.0f).xxx));
+  tint_symbol_18(sb, 224u, float2x4((0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_19(sb, 256u, float3x2((0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_20(sb, 288u, float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_21(sb, 336u, float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_22(sb, 384u, float4x2((0.0f).xx, (0.0f).xx, (0.0f).xx, (0.0f).xx));
+  tint_symbol_23(sb, 416u, float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx));
+  tint_symbol_24(sb, 480u, float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx));
+  tint_symbol_25(sb, 544u, matrix<float16_t, 2, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_26(sb, 552u, matrix<float16_t, 2, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_27(sb, 568u, matrix<float16_t, 2, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  tint_symbol_28(sb, 584u, matrix<float16_t, 3, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_29(sb, 600u, matrix<float16_t, 3, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_30(sb, 624u, matrix<float16_t, 3, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  tint_symbol_31(sb, 648u, matrix<float16_t, 4, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx));
+  tint_symbol_32(sb, 664u, matrix<float16_t, 4, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx));
+  tint_symbol_33(sb, 696u, matrix<float16_t, 4, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx));
+  const float3 tint_symbol_38[2] = (float3[2])0;
+  tint_symbol_34(sb, 736u, tint_symbol_38);
+  const matrix<float16_t, 4, 2> tint_symbol_39[2] = (matrix<float16_t, 4, 2>[2])0;
+  tint_symbol_35(sb, 768u, tint_symbol_39);
+  const Inner tint_symbol_40 = (Inner)0;
+  tint_symbol_36(sb, 800u, tint_symbol_40);
+  const Inner tint_symbol_41[4] = (Inner[4])0;
+  tint_symbol_37(sb, 812u, tint_symbol_41);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002416606A770(4,3-11): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.glsl b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..6a5b7de
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.glsl

@@ -0,0 +1,118 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  int scalar_i32;
+  float scalar_f32;
+  float16_t scalar_f16;
+};
+
+struct S {
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
+  float16_t scalar_f16;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  f16vec2 vec2_f16;
+  uint pad;
+  vec3 vec3_f32;
+  uint pad_1;
+  ivec3 vec3_i32;
+  uint pad_2;
+  uvec3 vec3_u32;
+  uint pad_3;
+  f16vec3 vec3_f16;
+  uint pad_4;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  f16vec4 vec4_f16;
+  mat2 mat2x2_f32;
+  uint pad_6;
+  uint pad_7;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_8;
+  uint pad_9;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  f16mat2 mat2x2_f16;
+  f16mat2x3 mat2x3_f16;
+  f16mat2x4 mat2x4_f16;
+  f16mat3x2 mat3x2_f16;
+  uint pad_10;
+  f16mat3 mat3x3_f16;
+  f16mat3x4 mat3x4_f16;
+  f16mat4x2 mat4x2_f16;
+  f16mat4x3 mat4x3_f16;
+  f16mat4 mat4x4_f16;
+  uint pad_11;
+  uint pad_12;
+  vec3 arr2_vec3_f32[2];
+  f16mat4x2 arr2_mat4x2_f16[2];
+  Inner struct_inner;
+  Inner array_struct_inner[4];
+  uint pad_13;
+};
+
+layout(binding = 0, std430) buffer sb_block_ssbo {
+  S inner;
+} sb;
+
+void tint_symbol() {
+  sb.inner.scalar_f32 = 0.0f;
+  sb.inner.scalar_i32 = 0;
+  sb.inner.scalar_u32 = 0u;
+  sb.inner.scalar_f16 = 0.0hf;
+  sb.inner.vec2_f32 = vec2(0.0f);
+  sb.inner.vec2_i32 = ivec2(0);
+  sb.inner.vec2_u32 = uvec2(0u);
+  sb.inner.vec2_f16 = f16vec2(0.0hf);
+  sb.inner.vec3_f32 = vec3(0.0f);
+  sb.inner.vec3_i32 = ivec3(0);
+  sb.inner.vec3_u32 = uvec3(0u);
+  sb.inner.vec3_f16 = f16vec3(0.0hf);
+  sb.inner.vec4_f32 = vec4(0.0f);
+  sb.inner.vec4_i32 = ivec4(0);
+  sb.inner.vec4_u32 = uvec4(0u);
+  sb.inner.vec4_f16 = f16vec4(0.0hf);
+  sb.inner.mat2x2_f32 = mat2(vec2(0.0f), vec2(0.0f));
+  sb.inner.mat2x3_f32 = mat2x3(vec3(0.0f), vec3(0.0f));
+  sb.inner.mat2x4_f32 = mat2x4(vec4(0.0f), vec4(0.0f));
+  sb.inner.mat3x2_f32 = mat3x2(vec2(0.0f), vec2(0.0f), vec2(0.0f));
+  sb.inner.mat3x3_f32 = mat3(vec3(0.0f), vec3(0.0f), vec3(0.0f));
+  sb.inner.mat3x4_f32 = mat3x4(vec4(0.0f), vec4(0.0f), vec4(0.0f));
+  sb.inner.mat4x2_f32 = mat4x2(vec2(0.0f), vec2(0.0f), vec2(0.0f), vec2(0.0f));
+  sb.inner.mat4x3_f32 = mat4x3(vec3(0.0f), vec3(0.0f), vec3(0.0f), vec3(0.0f));
+  sb.inner.mat4x4_f32 = mat4(vec4(0.0f), vec4(0.0f), vec4(0.0f), vec4(0.0f));
+  sb.inner.mat2x2_f16 = f16mat2(f16vec2(0.0hf), f16vec2(0.0hf));
+  sb.inner.mat2x3_f16 = f16mat2x3(f16vec3(0.0hf), f16vec3(0.0hf));
+  sb.inner.mat2x4_f16 = f16mat2x4(f16vec4(0.0hf), f16vec4(0.0hf));
+  sb.inner.mat3x2_f16 = f16mat3x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf));
+  sb.inner.mat3x3_f16 = f16mat3(f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf));
+  sb.inner.mat3x4_f16 = f16mat3x4(f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf));
+  sb.inner.mat4x2_f16 = f16mat4x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf));
+  sb.inner.mat4x3_f16 = f16mat4x3(f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf));
+  sb.inner.mat4x4_f16 = f16mat4(f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf));
+  vec3 tint_symbol_1[2] = vec3[2](vec3(0.0f), vec3(0.0f));
+  sb.inner.arr2_vec3_f32 = tint_symbol_1;
+  f16mat4x2 tint_symbol_2[2] = f16mat4x2[2](f16mat4x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf)), f16mat4x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf)));
+  sb.inner.arr2_mat4x2_f16 = tint_symbol_2;
+  Inner tint_symbol_3 = Inner(0, 0.0f, 0.0hf);
+  sb.inner.struct_inner = tint_symbol_3;
+  Inner tint_symbol_4[4] = Inner[4](Inner(0, 0.0f, 0.0hf), Inner(0, 0.0f, 0.0hf), Inner(0, 0.0f, 0.0hf), Inner(0, 0.0f, 0.0hf));
+  sb.inner.array_struct_inner = tint_symbol_4;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.msl b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.msl
new file mode 100644
index 0000000..f67cfc5
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.msl

@@ -0,0 +1,121 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ int scalar_i32;
+  /* 0x0004 */ float scalar_f32;
+  /* 0x0008 */ half scalar_f16;
+  /* 0x000a */ tint_array<int8_t, 2> tint_pad;
+};
+
+struct S {
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ half scalar_f16;
+  /* 0x000e */ tint_array<int8_t, 2> tint_pad_1;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ half2 vec2_f16;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_3;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_5;
+  /* 0x0060 */ packed_half3 vec3_f16;
+  /* 0x0066 */ tint_array<int8_t, 10> tint_pad_6;
+  /* 0x0070 */ float4 vec4_f32;
+  /* 0x0080 */ int4 vec4_i32;
+  /* 0x0090 */ uint4 vec4_u32;
+  /* 0x00a0 */ half4 vec4_f16;
+  /* 0x00a8 */ float2x2 mat2x2_f32;
+  /* 0x00b8 */ tint_array<int8_t, 8> tint_pad_7;
+  /* 0x00c0 */ float2x3 mat2x3_f32;
+  /* 0x00e0 */ float2x4 mat2x4_f32;
+  /* 0x0100 */ float3x2 mat3x2_f32;
+  /* 0x0118 */ tint_array<int8_t, 8> tint_pad_8;
+  /* 0x0120 */ float3x3 mat3x3_f32;
+  /* 0x0150 */ float3x4 mat3x4_f32;
+  /* 0x0180 */ float4x2 mat4x2_f32;
+  /* 0x01a0 */ float4x3 mat4x3_f32;
+  /* 0x01e0 */ float4x4 mat4x4_f32;
+  /* 0x0220 */ half2x2 mat2x2_f16;
+  /* 0x0228 */ half2x3 mat2x3_f16;
+  /* 0x0238 */ half2x4 mat2x4_f16;
+  /* 0x0248 */ half3x2 mat3x2_f16;
+  /* 0x0254 */ tint_array<int8_t, 4> tint_pad_9;
+  /* 0x0258 */ half3x3 mat3x3_f16;
+  /* 0x0270 */ half3x4 mat3x4_f16;
+  /* 0x0288 */ half4x2 mat4x2_f16;
+  /* 0x0298 */ half4x3 mat4x3_f16;
+  /* 0x02b8 */ half4x4 mat4x4_f16;
+  /* 0x02d8 */ tint_array<int8_t, 8> tint_pad_10;
+  /* 0x02e0 */ tint_array<float3, 2> arr2_vec3_f32;
+  /* 0x0300 */ tint_array<half4x2, 2> arr2_mat4x2_f16;
+  /* 0x0320 */ Inner struct_inner;
+  /* 0x032c */ tint_array<Inner, 4> array_struct_inner;
+  /* 0x035c */ tint_array<int8_t, 4> tint_pad_11;
+};
+
+kernel void tint_symbol(device S* tint_symbol_5 [[buffer(0)]]) {
+  (*(tint_symbol_5)).scalar_f32 = 0.0f;
+  (*(tint_symbol_5)).scalar_i32 = 0;
+  (*(tint_symbol_5)).scalar_u32 = 0u;
+  (*(tint_symbol_5)).scalar_f16 = 0.0h;
+  (*(tint_symbol_5)).vec2_f32 = float2(0.0f);
+  (*(tint_symbol_5)).vec2_i32 = int2(0);
+  (*(tint_symbol_5)).vec2_u32 = uint2(0u);
+  (*(tint_symbol_5)).vec2_f16 = half2(0.0h);
+  (*(tint_symbol_5)).vec3_f32 = float3(0.0f);
+  (*(tint_symbol_5)).vec3_i32 = int3(0);
+  (*(tint_symbol_5)).vec3_u32 = uint3(0u);
+  (*(tint_symbol_5)).vec3_f16 = half3(0.0h);
+  (*(tint_symbol_5)).vec4_f32 = float4(0.0f);
+  (*(tint_symbol_5)).vec4_i32 = int4(0);
+  (*(tint_symbol_5)).vec4_u32 = uint4(0u);
+  (*(tint_symbol_5)).vec4_f16 = half4(0.0h);
+  (*(tint_symbol_5)).mat2x2_f32 = float2x2(float2(0.0f), float2(0.0f));
+  (*(tint_symbol_5)).mat2x3_f32 = float2x3(float3(0.0f), float3(0.0f));
+  (*(tint_symbol_5)).mat2x4_f32 = float2x4(float4(0.0f), float4(0.0f));
+  (*(tint_symbol_5)).mat3x2_f32 = float3x2(float2(0.0f), float2(0.0f), float2(0.0f));
+  (*(tint_symbol_5)).mat3x3_f32 = float3x3(float3(0.0f), float3(0.0f), float3(0.0f));
+  (*(tint_symbol_5)).mat3x4_f32 = float3x4(float4(0.0f), float4(0.0f), float4(0.0f));
+  (*(tint_symbol_5)).mat4x2_f32 = float4x2(float2(0.0f), float2(0.0f), float2(0.0f), float2(0.0f));
+  (*(tint_symbol_5)).mat4x3_f32 = float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f));
+  (*(tint_symbol_5)).mat4x4_f32 = float4x4(float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f));
+  (*(tint_symbol_5)).mat2x2_f16 = half2x2(half2(0.0h), half2(0.0h));
+  (*(tint_symbol_5)).mat2x3_f16 = half2x3(half3(0.0h), half3(0.0h));
+  (*(tint_symbol_5)).mat2x4_f16 = half2x4(half4(0.0h), half4(0.0h));
+  (*(tint_symbol_5)).mat3x2_f16 = half3x2(half2(0.0h), half2(0.0h), half2(0.0h));
+  (*(tint_symbol_5)).mat3x3_f16 = half3x3(half3(0.0h), half3(0.0h), half3(0.0h));
+  (*(tint_symbol_5)).mat3x4_f16 = half3x4(half4(0.0h), half4(0.0h), half4(0.0h));
+  (*(tint_symbol_5)).mat4x2_f16 = half4x2(half2(0.0h), half2(0.0h), half2(0.0h), half2(0.0h));
+  (*(tint_symbol_5)).mat4x3_f16 = half4x3(half3(0.0h), half3(0.0h), half3(0.0h), half3(0.0h));
+  (*(tint_symbol_5)).mat4x4_f16 = half4x4(half4(0.0h), half4(0.0h), half4(0.0h), half4(0.0h));
+  tint_array<float3, 2> const tint_symbol_1 = tint_array<float3, 2>{};
+  (*(tint_symbol_5)).arr2_vec3_f32 = tint_symbol_1;
+  tint_array<half4x2, 2> const tint_symbol_2 = tint_array<half4x2, 2>{};
+  (*(tint_symbol_5)).arr2_mat4x2_f16 = tint_symbol_2;
+  Inner const tint_symbol_3 = Inner{};
+  (*(tint_symbol_5)).struct_inner = tint_symbol_3;
+  tint_array<Inner, 4> const tint_symbol_4 = tint_array<Inner, 4>{};
+  (*(tint_symbol_5)).array_struct_inner = tint_symbol_4;
+  return;
+}
+

diff --git a/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..16da863
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.spvasm

@@ -0,0 +1,384 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 199
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %sb_block "sb_block"
+               OpMemberName %sb_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "scalar_f32"
+               OpMemberName %S 1 "scalar_i32"
+               OpMemberName %S 2 "scalar_u32"
+               OpMemberName %S 3 "scalar_f16"
+               OpMemberName %S 4 "vec2_f32"
+               OpMemberName %S 5 "vec2_i32"
+               OpMemberName %S 6 "vec2_u32"
+               OpMemberName %S 7 "vec2_f16"
+               OpMemberName %S 8 "vec3_f32"
+               OpMemberName %S 9 "vec3_i32"
+               OpMemberName %S 10 "vec3_u32"
+               OpMemberName %S 11 "vec3_f16"
+               OpMemberName %S 12 "vec4_f32"
+               OpMemberName %S 13 "vec4_i32"
+               OpMemberName %S 14 "vec4_u32"
+               OpMemberName %S 15 "vec4_f16"
+               OpMemberName %S 16 "mat2x2_f32"
+               OpMemberName %S 17 "mat2x3_f32"
+               OpMemberName %S 18 "mat2x4_f32"
+               OpMemberName %S 19 "mat3x2_f32"
+               OpMemberName %S 20 "mat3x3_f32"
+               OpMemberName %S 21 "mat3x4_f32"
+               OpMemberName %S 22 "mat4x2_f32"
+               OpMemberName %S 23 "mat4x3_f32"
+               OpMemberName %S 24 "mat4x4_f32"
+               OpMemberName %S 25 "mat2x2_f16"
+               OpMemberName %S 26 "mat2x3_f16"
+               OpMemberName %S 27 "mat2x4_f16"
+               OpMemberName %S 28 "mat3x2_f16"
+               OpMemberName %S 29 "mat3x3_f16"
+               OpMemberName %S 30 "mat3x4_f16"
+               OpMemberName %S 31 "mat4x2_f16"
+               OpMemberName %S 32 "mat4x3_f16"
+               OpMemberName %S 33 "mat4x4_f16"
+               OpMemberName %S 34 "arr2_vec3_f32"
+               OpMemberName %S 35 "arr2_mat4x2_f16"
+               OpMemberName %S 36 "struct_inner"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "scalar_i32"
+               OpMemberName %Inner 1 "scalar_f32"
+               OpMemberName %Inner 2 "scalar_f16"
+               OpMemberName %S 37 "array_struct_inner"
+               OpName %sb "sb"
+               OpName %main "main"
+               OpDecorate %sb_block Block
+               OpMemberDecorate %sb_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 2 Offset 8
+               OpMemberDecorate %S 3 Offset 12
+               OpMemberDecorate %S 4 Offset 16
+               OpMemberDecorate %S 5 Offset 24
+               OpMemberDecorate %S 6 Offset 32
+               OpMemberDecorate %S 7 Offset 40
+               OpMemberDecorate %S 8 Offset 48
+               OpMemberDecorate %S 9 Offset 64
+               OpMemberDecorate %S 10 Offset 80
+               OpMemberDecorate %S 11 Offset 96
+               OpMemberDecorate %S 12 Offset 112
+               OpMemberDecorate %S 13 Offset 128
+               OpMemberDecorate %S 14 Offset 144
+               OpMemberDecorate %S 15 Offset 160
+               OpMemberDecorate %S 16 Offset 168
+               OpMemberDecorate %S 16 ColMajor
+               OpMemberDecorate %S 16 MatrixStride 8
+               OpMemberDecorate %S 17 Offset 192
+               OpMemberDecorate %S 17 ColMajor
+               OpMemberDecorate %S 17 MatrixStride 16
+               OpMemberDecorate %S 18 Offset 224
+               OpMemberDecorate %S 18 ColMajor
+               OpMemberDecorate %S 18 MatrixStride 16
+               OpMemberDecorate %S 19 Offset 256
+               OpMemberDecorate %S 19 ColMajor
+               OpMemberDecorate %S 19 MatrixStride 8
+               OpMemberDecorate %S 20 Offset 288
+               OpMemberDecorate %S 20 ColMajor
+               OpMemberDecorate %S 20 MatrixStride 16
+               OpMemberDecorate %S 21 Offset 336
+               OpMemberDecorate %S 21 ColMajor
+               OpMemberDecorate %S 21 MatrixStride 16
+               OpMemberDecorate %S 22 Offset 384
+               OpMemberDecorate %S 22 ColMajor
+               OpMemberDecorate %S 22 MatrixStride 8
+               OpMemberDecorate %S 23 Offset 416
+               OpMemberDecorate %S 23 ColMajor
+               OpMemberDecorate %S 23 MatrixStride 16
+               OpMemberDecorate %S 24 Offset 480
+               OpMemberDecorate %S 24 ColMajor
+               OpMemberDecorate %S 24 MatrixStride 16
+               OpMemberDecorate %S 25 Offset 544
+               OpMemberDecorate %S 25 ColMajor
+               OpMemberDecorate %S 25 MatrixStride 4
+               OpMemberDecorate %S 26 Offset 552
+               OpMemberDecorate %S 26 ColMajor
+               OpMemberDecorate %S 26 MatrixStride 8
+               OpMemberDecorate %S 27 Offset 568
+               OpMemberDecorate %S 27 ColMajor
+               OpMemberDecorate %S 27 MatrixStride 8
+               OpMemberDecorate %S 28 Offset 584
+               OpMemberDecorate %S 28 ColMajor
+               OpMemberDecorate %S 28 MatrixStride 4
+               OpMemberDecorate %S 29 Offset 600
+               OpMemberDecorate %S 29 ColMajor
+               OpMemberDecorate %S 29 MatrixStride 8
+               OpMemberDecorate %S 30 Offset 624
+               OpMemberDecorate %S 30 ColMajor
+               OpMemberDecorate %S 30 MatrixStride 8
+               OpMemberDecorate %S 31 Offset 648
+               OpMemberDecorate %S 31 ColMajor
+               OpMemberDecorate %S 31 MatrixStride 4
+               OpMemberDecorate %S 32 Offset 664
+               OpMemberDecorate %S 32 ColMajor
+               OpMemberDecorate %S 32 MatrixStride 8
+               OpMemberDecorate %S 33 Offset 696
+               OpMemberDecorate %S 33 ColMajor
+               OpMemberDecorate %S 33 MatrixStride 8
+               OpMemberDecorate %S 34 Offset 736
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpMemberDecorate %S 35 Offset 768
+               OpMemberDecorate %S 35 ColMajor
+               OpMemberDecorate %S 35 MatrixStride 4
+               OpDecorate %_arr_mat4v2half_uint_2 ArrayStride 16
+               OpMemberDecorate %S 36 Offset 800
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 1 Offset 4
+               OpMemberDecorate %Inner 2 Offset 8
+               OpMemberDecorate %S 37 Offset 812
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 12
+               OpDecorate %sb Binding 0
+               OpDecorate %sb DescriptorSet 0
+      %float = OpTypeFloat 32
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+       %half = OpTypeFloat 16
+    %v2float = OpTypeVector %float 2
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+     %v2half = OpTypeVector %half 2
+    %v3float = OpTypeVector %float 3
+      %v3int = OpTypeVector %int 3
+     %v3uint = OpTypeVector %uint 3
+     %v3half = OpTypeVector %half 3
+    %v4float = OpTypeVector %float 4
+      %v4int = OpTypeVector %int 4
+     %v4uint = OpTypeVector %uint 4
+     %v4half = OpTypeVector %half 4
+%mat2v2float = OpTypeMatrix %v2float 2
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v2float = OpTypeMatrix %v2float 4
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+ %mat2v2half = OpTypeMatrix %v2half 2
+ %mat2v3half = OpTypeMatrix %v3half 2
+ %mat2v4half = OpTypeMatrix %v4half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+ %mat3v3half = OpTypeMatrix %v3half 3
+ %mat3v4half = OpTypeMatrix %v4half 3
+ %mat4v2half = OpTypeMatrix %v2half 4
+ %mat4v3half = OpTypeMatrix %v3half 4
+ %mat4v4half = OpTypeMatrix %v4half 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+%_arr_mat4v2half_uint_2 = OpTypeArray %mat4v2half %uint_2
+      %Inner = OpTypeStruct %int %float %half
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+          %S = OpTypeStruct %float %int %uint %half %v2float %v2int %v2uint %v2half %v3float %v3int %v3uint %v3half %v4float %v4int %v4uint %v4half %mat2v2float %mat2v3float %mat2v4float %mat3v2float %mat3v3float %mat3v4float %mat4v2float %mat4v3float %mat4v4float %mat2v2half %mat2v3half %mat2v4half %mat3v2half %mat3v3half %mat3v4half %mat4v2half %mat4v3half %mat4v4half %_arr_v3float_uint_2 %_arr_mat4v2half_uint_2 %Inner %_arr_Inner_uint_4
+   %sb_block = OpTypeStruct %S
+%_ptr_StorageBuffer_sb_block = OpTypePointer StorageBuffer %sb_block
+         %sb = OpVariable %_ptr_StorageBuffer_sb_block StorageBuffer
+       %void = OpTypeVoid
+         %45 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+         %52 = OpConstantNull %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+         %56 = OpConstantNull %int
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+         %59 = OpConstantNull %uint
+     %uint_3 = OpConstant %uint 3
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+         %63 = OpConstantNull %half
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+         %66 = OpConstantNull %v2float
+     %uint_5 = OpConstant %uint 5
+%_ptr_StorageBuffer_v2int = OpTypePointer StorageBuffer %v2int
+         %70 = OpConstantNull %v2int
+     %uint_6 = OpConstant %uint 6
+%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
+         %74 = OpConstantNull %v2uint
+     %uint_7 = OpConstant %uint 7
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+         %78 = OpConstantNull %v2half
+     %uint_8 = OpConstant %uint 8
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+         %82 = OpConstantNull %v3float
+     %uint_9 = OpConstant %uint 9
+%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
+         %86 = OpConstantNull %v3int
+    %uint_10 = OpConstant %uint 10
+%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
+         %90 = OpConstantNull %v3uint
+    %uint_11 = OpConstant %uint 11
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+         %94 = OpConstantNull %v3half
+    %uint_12 = OpConstant %uint 12
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+         %98 = OpConstantNull %v4float
+    %uint_13 = OpConstant %uint 13
+%_ptr_StorageBuffer_v4int = OpTypePointer StorageBuffer %v4int
+        %102 = OpConstantNull %v4int
+    %uint_14 = OpConstant %uint 14
+%_ptr_StorageBuffer_v4uint = OpTypePointer StorageBuffer %v4uint
+        %106 = OpConstantNull %v4uint
+    %uint_15 = OpConstant %uint 15
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+        %110 = OpConstantNull %v4half
+    %uint_16 = OpConstant %uint 16
+%_ptr_StorageBuffer_mat2v2float = OpTypePointer StorageBuffer %mat2v2float
+        %114 = OpConstantNull %mat2v2float
+    %uint_17 = OpConstant %uint 17
+%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+        %118 = OpConstantNull %mat2v3float
+    %uint_18 = OpConstant %uint 18
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+        %122 = OpConstantNull %mat2v4float
+    %uint_19 = OpConstant %uint 19
+%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+        %126 = OpConstantNull %mat3v2float
+    %uint_20 = OpConstant %uint 20
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+        %130 = OpConstantNull %mat3v3float
+    %uint_21 = OpConstant %uint 21
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+        %134 = OpConstantNull %mat3v4float
+    %uint_22 = OpConstant %uint 22
+%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
+        %138 = OpConstantNull %mat4v2float
+    %uint_23 = OpConstant %uint 23
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+        %142 = OpConstantNull %mat4v3float
+    %uint_24 = OpConstant %uint 24
+%_ptr_StorageBuffer_mat4v4float = OpTypePointer StorageBuffer %mat4v4float
+        %146 = OpConstantNull %mat4v4float
+    %uint_25 = OpConstant %uint 25
+%_ptr_StorageBuffer_mat2v2half = OpTypePointer StorageBuffer %mat2v2half
+        %150 = OpConstantNull %mat2v2half
+    %uint_26 = OpConstant %uint 26
+%_ptr_StorageBuffer_mat2v3half = OpTypePointer StorageBuffer %mat2v3half
+        %154 = OpConstantNull %mat2v3half
+    %uint_27 = OpConstant %uint 27
+%_ptr_StorageBuffer_mat2v4half = OpTypePointer StorageBuffer %mat2v4half
+        %158 = OpConstantNull %mat2v4half
+    %uint_28 = OpConstant %uint 28
+%_ptr_StorageBuffer_mat3v2half = OpTypePointer StorageBuffer %mat3v2half
+        %162 = OpConstantNull %mat3v2half
+    %uint_29 = OpConstant %uint 29
+%_ptr_StorageBuffer_mat3v3half = OpTypePointer StorageBuffer %mat3v3half
+        %166 = OpConstantNull %mat3v3half
+    %uint_30 = OpConstant %uint 30
+%_ptr_StorageBuffer_mat3v4half = OpTypePointer StorageBuffer %mat3v4half
+        %170 = OpConstantNull %mat3v4half
+    %uint_31 = OpConstant %uint 31
+%_ptr_StorageBuffer_mat4v2half = OpTypePointer StorageBuffer %mat4v2half
+        %174 = OpConstantNull %mat4v2half
+    %uint_32 = OpConstant %uint 32
+%_ptr_StorageBuffer_mat4v3half = OpTypePointer StorageBuffer %mat4v3half
+        %178 = OpConstantNull %mat4v3half
+    %uint_33 = OpConstant %uint 33
+%_ptr_StorageBuffer_mat4v4half = OpTypePointer StorageBuffer %mat4v4half
+        %182 = OpConstantNull %mat4v4half
+    %uint_34 = OpConstant %uint 34
+%_ptr_StorageBuffer__arr_v3float_uint_2 = OpTypePointer StorageBuffer %_arr_v3float_uint_2
+        %186 = OpConstantNull %_arr_v3float_uint_2
+    %uint_35 = OpConstant %uint 35
+%_ptr_StorageBuffer__arr_mat4v2half_uint_2 = OpTypePointer StorageBuffer %_arr_mat4v2half_uint_2
+        %190 = OpConstantNull %_arr_mat4v2half_uint_2
+    %uint_36 = OpConstant %uint 36
+%_ptr_StorageBuffer_Inner = OpTypePointer StorageBuffer %Inner
+        %194 = OpConstantNull %Inner
+    %uint_37 = OpConstant %uint 37
+%_ptr_StorageBuffer__arr_Inner_uint_4 = OpTypePointer StorageBuffer %_arr_Inner_uint_4
+        %198 = OpConstantNull %_arr_Inner_uint_4
+       %main = OpFunction %void None %45
+         %48 = OpLabel
+         %51 = OpAccessChain %_ptr_StorageBuffer_float %sb %uint_0 %uint_0
+               OpStore %51 %52
+         %55 = OpAccessChain %_ptr_StorageBuffer_int %sb %uint_0 %uint_1
+               OpStore %55 %56
+         %58 = OpAccessChain %_ptr_StorageBuffer_uint %sb %uint_0 %uint_2
+               OpStore %58 %59
+         %62 = OpAccessChain %_ptr_StorageBuffer_half %sb %uint_0 %uint_3
+               OpStore %62 %63
+         %65 = OpAccessChain %_ptr_StorageBuffer_v2float %sb %uint_0 %uint_4
+               OpStore %65 %66
+         %69 = OpAccessChain %_ptr_StorageBuffer_v2int %sb %uint_0 %uint_5
+               OpStore %69 %70
+         %73 = OpAccessChain %_ptr_StorageBuffer_v2uint %sb %uint_0 %uint_6
+               OpStore %73 %74
+         %77 = OpAccessChain %_ptr_StorageBuffer_v2half %sb %uint_0 %uint_7
+               OpStore %77 %78
+         %81 = OpAccessChain %_ptr_StorageBuffer_v3float %sb %uint_0 %uint_8
+               OpStore %81 %82
+         %85 = OpAccessChain %_ptr_StorageBuffer_v3int %sb %uint_0 %uint_9
+               OpStore %85 %86
+         %89 = OpAccessChain %_ptr_StorageBuffer_v3uint %sb %uint_0 %uint_10
+               OpStore %89 %90
+         %93 = OpAccessChain %_ptr_StorageBuffer_v3half %sb %uint_0 %uint_11
+               OpStore %93 %94
+         %97 = OpAccessChain %_ptr_StorageBuffer_v4float %sb %uint_0 %uint_12
+               OpStore %97 %98
+        %101 = OpAccessChain %_ptr_StorageBuffer_v4int %sb %uint_0 %uint_13
+               OpStore %101 %102
+        %105 = OpAccessChain %_ptr_StorageBuffer_v4uint %sb %uint_0 %uint_14
+               OpStore %105 %106
+        %109 = OpAccessChain %_ptr_StorageBuffer_v4half %sb %uint_0 %uint_15
+               OpStore %109 %110
+        %113 = OpAccessChain %_ptr_StorageBuffer_mat2v2float %sb %uint_0 %uint_16
+               OpStore %113 %114
+        %117 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %sb %uint_0 %uint_17
+               OpStore %117 %118
+        %121 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %sb %uint_0 %uint_18
+               OpStore %121 %122
+        %125 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %sb %uint_0 %uint_19
+               OpStore %125 %126
+        %129 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %sb %uint_0 %uint_20
+               OpStore %129 %130
+        %133 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %sb %uint_0 %uint_21
+               OpStore %133 %134
+        %137 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %sb %uint_0 %uint_22
+               OpStore %137 %138
+        %141 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %sb %uint_0 %uint_23
+               OpStore %141 %142
+        %145 = OpAccessChain %_ptr_StorageBuffer_mat4v4float %sb %uint_0 %uint_24
+               OpStore %145 %146
+        %149 = OpAccessChain %_ptr_StorageBuffer_mat2v2half %sb %uint_0 %uint_25
+               OpStore %149 %150
+        %153 = OpAccessChain %_ptr_StorageBuffer_mat2v3half %sb %uint_0 %uint_26
+               OpStore %153 %154
+        %157 = OpAccessChain %_ptr_StorageBuffer_mat2v4half %sb %uint_0 %uint_27
+               OpStore %157 %158
+        %161 = OpAccessChain %_ptr_StorageBuffer_mat3v2half %sb %uint_0 %uint_28
+               OpStore %161 %162
+        %165 = OpAccessChain %_ptr_StorageBuffer_mat3v3half %sb %uint_0 %uint_29
+               OpStore %165 %166
+        %169 = OpAccessChain %_ptr_StorageBuffer_mat3v4half %sb %uint_0 %uint_30
+               OpStore %169 %170
+        %173 = OpAccessChain %_ptr_StorageBuffer_mat4v2half %sb %uint_0 %uint_31
+               OpStore %173 %174
+        %177 = OpAccessChain %_ptr_StorageBuffer_mat4v3half %sb %uint_0 %uint_32
+               OpStore %177 %178
+        %181 = OpAccessChain %_ptr_StorageBuffer_mat4v4half %sb %uint_0 %uint_33
+               OpStore %181 %182
+        %185 = OpAccessChain %_ptr_StorageBuffer__arr_v3float_uint_2 %sb %uint_0 %uint_34
+               OpStore %185 %186
+        %189 = OpAccessChain %_ptr_StorageBuffer__arr_mat4v2half_uint_2 %sb %uint_0 %uint_35
+               OpStore %189 %190
+        %193 = OpAccessChain %_ptr_StorageBuffer_Inner %sb %uint_0 %uint_36
+               OpStore %193 %194
+        %197 = OpAccessChain %_ptr_StorageBuffer__arr_Inner_uint_4 %sb %uint_0 %uint_37
+               OpStore %197 %198
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..d2f699a
--- /dev/null
+++ b/test/tint/buffer/storage/static_index/write_f16.wgsl.expected.wgsl

@@ -0,0 +1,92 @@
+enable f16;
+
+struct Inner {
+  scalar_i32 : i32,
+  scalar_f32 : f32,
+  scalar_f16 : f16,
+}
+
+struct S {
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+  struct_inner : Inner,
+  array_struct_inner : array<Inner, 4>,
+}
+
+@binding(0) @group(0) var<storage, read_write> sb : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  sb.scalar_f32 = f32();
+  sb.scalar_i32 = i32();
+  sb.scalar_u32 = u32();
+  sb.scalar_f16 = f16();
+  sb.vec2_f32 = vec2<f32>();
+  sb.vec2_i32 = vec2<i32>();
+  sb.vec2_u32 = vec2<u32>();
+  sb.vec2_f16 = vec2<f16>();
+  sb.vec3_f32 = vec3<f32>();
+  sb.vec3_i32 = vec3<i32>();
+  sb.vec3_u32 = vec3<u32>();
+  sb.vec3_f16 = vec3<f16>();
+  sb.vec4_f32 = vec4<f32>();
+  sb.vec4_i32 = vec4<i32>();
+  sb.vec4_u32 = vec4<u32>();
+  sb.vec4_f16 = vec4<f16>();
+  sb.mat2x2_f32 = mat2x2<f32>();
+  sb.mat2x3_f32 = mat2x3<f32>();
+  sb.mat2x4_f32 = mat2x4<f32>();
+  sb.mat3x2_f32 = mat3x2<f32>();
+  sb.mat3x3_f32 = mat3x3<f32>();
+  sb.mat3x4_f32 = mat3x4<f32>();
+  sb.mat4x2_f32 = mat4x2<f32>();
+  sb.mat4x3_f32 = mat4x3<f32>();
+  sb.mat4x4_f32 = mat4x4<f32>();
+  sb.mat2x2_f16 = mat2x2<f16>();
+  sb.mat2x3_f16 = mat2x3<f16>();
+  sb.mat2x4_f16 = mat2x4<f16>();
+  sb.mat3x2_f16 = mat3x2<f16>();
+  sb.mat3x3_f16 = mat3x3<f16>();
+  sb.mat3x4_f16 = mat3x4<f16>();
+  sb.mat4x2_f16 = mat4x2<f16>();
+  sb.mat4x3_f16 = mat4x3<f16>();
+  sb.mat4x4_f16 = mat4x4<f16>();
+  sb.arr2_vec3_f32 = array<vec3<f32>, 2>();
+  sb.arr2_mat4x2_f16 = array<mat4x2<f16>, 2>();
+  sb.struct_inner = Inner();
+  sb.array_struct_inner = array<Inner, 4>();
+}

diff --git a/test/tint/buffer/storage/types/array4_f16.wgsl b/test/tint/buffer/storage/types/array4_f16.wgsl
new file mode 100644
index 0000000..adc6067
--- /dev/null
+++ b/test/tint/buffer/storage/types/array4_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : array<f16, 4>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : array<f16, 4>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/array4_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..15fa0ab
--- /dev/null
+++ b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,28 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float16_t value[4]) {
+  float16_t array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      buffer.Store<float16_t>((offset + (i * 2u)), array[i]);
+    }
+  }
+}
+
+typedef float16_t tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  float16_t arr[4] = (float16_t[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = buffer.Load<float16_t>((offset + (i_1 * 2u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/array4_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..68566c9
--- /dev/null
+++ b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,33 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float16_t value[4]) {
+  float16_t array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      buffer.Store<float16_t>((offset + (i * 2u)), array[i]);
+    }
+  }
+}
+
+typedef float16_t tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  float16_t arr[4] = (float16_t[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = buffer.Load<float16_t>((offset + (i_1 * 2u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000026D14358AA0(4,61-69): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/storage/types/array4_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..ea5353e
--- /dev/null
+++ b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  float16_t inner[4];
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  float16_t inner[4];
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/array4_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.msl
new file mode 100644
index 0000000..78f6acf
--- /dev/null
+++ b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void tint_symbol(device tint_array<half, 4>* tint_symbol_1 [[buffer(0)]], const device tint_array<half, 4>* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/array4_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..64da920
--- /dev/null
+++ b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.spvasm

@@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %_arr_half_uint_4 ArrayStride 2
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_half_uint_4 = OpTypeArray %half %uint_4
+   %in_block = OpTypeStruct %_arr_half_uint_4
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %9 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_half_uint_4 = OpTypePointer StorageBuffer %_arr_half_uint_4
+       %main = OpFunction %void None %9
+         %12 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_4 %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer__arr_half_uint_4 %in %uint_0
+         %17 = OpLoad %_arr_half_uint_4 %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/array4_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..69b786c
--- /dev/null
+++ b/test/tint/buffer/storage/types/array4_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : array<f16, 4>;
+
+@group(0) @binding(1) var<storage, read_write> out : array<f16, 4>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/array.wgsl b/test/tint/buffer/storage/types/array4_f32.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/array.wgsl
rename to test/tint/buffer/storage/types/array4_f32.wgsl


diff --git a/test/tint/buffer/storage/types/array.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/array4_f32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/array.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/storage/types/array4_f32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/storage/types/array.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/array4_f32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/array.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/storage/types/array4_f32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/storage/types/array.wgsl.expected.glsl b/test/tint/buffer/storage/types/array4_f32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/storage/types/array.wgsl.expected.glsl
rename to test/tint/buffer/storage/types/array4_f32.wgsl.expected.glsl


diff --git a/test/tint/buffer/storage/types/array.wgsl.expected.msl b/test/tint/buffer/storage/types/array4_f32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/storage/types/array.wgsl.expected.msl
rename to test/tint/buffer/storage/types/array4_f32.wgsl.expected.msl


diff --git a/test/tint/buffer/storage/types/array.wgsl.expected.spvasm b/test/tint/buffer/storage/types/array4_f32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/storage/types/array.wgsl.expected.spvasm
rename to test/tint/buffer/storage/types/array4_f32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/storage/types/array.wgsl.expected.wgsl b/test/tint/buffer/storage/types/array4_f32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/array.wgsl.expected.wgsl
rename to test/tint/buffer/storage/types/array4_f32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/storage/types/f16.wgsl b/test/tint/buffer/storage/types/f16.wgsl
new file mode 100644
index 0000000..4cc239e
--- /dev/null
+++ b/test/tint/buffer/storage/types/f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : f16;
+
+@group(0) @binding(1)
+var<storage, read_write> out : f16;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..bfdede4
--- /dev/null
+++ b/test/tint/buffer/storage/types/f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store<float16_t>(0u, tint_symbol.Load<float16_t>(0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b9b25bf
--- /dev/null
+++ b/test/tint/buffer/storage/types/f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,13 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store<float16_t>(0u, tint_symbol.Load<float16_t>(0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000020E4EF78E80(6,3-21): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/f16.wgsl.expected.glsl
new file mode 100644
index 0000000..7f3dfad
--- /dev/null
+++ b/test/tint/buffer/storage/types/f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  float16_t inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  float16_t inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/f16.wgsl.expected.msl b/test/tint/buffer/storage/types/f16.wgsl.expected.msl
new file mode 100644
index 0000000..5991f9e
--- /dev/null
+++ b/test/tint/buffer/storage/types/f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half* tint_symbol_1 [[buffer(0)]], const device half* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..18f45d8
--- /dev/null
+++ b/test/tint/buffer/storage/types/f16.wgsl.expected.spvasm

@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 16
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+   %in_block = OpTypeStruct %half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+       %main = OpFunction %void None %6
+          %9 = OpLabel
+         %13 = OpAccessChain %_ptr_StorageBuffer_half %out %uint_0
+         %14 = OpAccessChain %_ptr_StorageBuffer_half %in %uint_0
+         %15 = OpLoad %half %14
+               OpStore %13 %15
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..69bde46
--- /dev/null
+++ b/test/tint/buffer/storage/types/f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : f16;
+
+@group(0) @binding(1) var<storage, read_write> out : f16;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat2x2_f16.wgsl b/test/tint/buffer/storage/types/mat2x2_f16.wgsl
new file mode 100644
index 0000000..cf89bf3
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x2_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : mat2x2<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3730b98
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+}
+
+matrix<float16_t, 2, 2> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ac54fc4
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,23 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+}
+
+matrix<float16_t, 2, 2> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001C807B6A460(4,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001C807B6A460(5,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..7c9f01a
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16mat2 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16mat2 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.msl
new file mode 100644
index 0000000..9c048c5
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half2x2* tint_symbol_1 [[buffer(0)]], const device half2x2* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..8bd8c4b
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.spvasm

@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 4
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+ %mat2v2half = OpTypeMatrix %v2half 2
+   %in_block = OpTypeStruct %mat2v2half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat2v2half = OpTypePointer StorageBuffer %mat2v2half
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat2v2half %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat2v2half %in %uint_0
+         %17 = OpLoad %mat2v2half %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..4beb63d
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x2_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : mat2x2<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat2x2.wgsl b/test/tint/buffer/storage/types/mat2x2_f32.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x2.wgsl
rename to test/tint/buffer/storage/types/mat2x2_f32.wgsl


diff --git a/test/tint/buffer/storage/types/mat2x2.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x2.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/storage/types/mat2x2.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x2.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/storage/types/mat2x2.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x2.wgsl.expected.glsl
rename to test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.glsl


diff --git a/test/tint/buffer/storage/types/mat2x2.wgsl.expected.msl b/test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x2.wgsl.expected.msl
rename to test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.msl


diff --git a/test/tint/buffer/storage/types/mat2x2.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x2.wgsl.expected.spvasm
rename to test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/storage/types/mat2x2.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x2.wgsl.expected.wgsl
rename to test/tint/buffer/storage/types/mat2x2_f32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/storage/types/mat2x3_f16.wgsl b/test/tint/buffer/storage/types/mat2x3_f16.wgsl
new file mode 100644
index 0000000..dba9a50
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x3_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : mat2x3<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0411bf3
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+matrix<float16_t, 2, 3> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..3eb59f8
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,23 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+matrix<float16_t, 2, 3> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000020545EDA130(4,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000020545EDA130(5,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..6bed36f
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16mat2x3 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16mat2x3 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.msl
new file mode 100644
index 0000000..81af92d
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half2x3* tint_symbol_1 [[buffer(0)]], const device half2x3* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..5ddb847
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.spvasm

@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 8
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat2v3half = OpTypeMatrix %v3half 2
+   %in_block = OpTypeStruct %mat2v3half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat2v3half = OpTypePointer StorageBuffer %mat2v3half
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat2v3half %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat2v3half %in %uint_0
+         %17 = OpLoad %mat2v3half %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..b60f8d1
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x3_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : mat2x3<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat2x3.wgsl b/test/tint/buffer/storage/types/mat2x3_f32.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x3.wgsl
rename to test/tint/buffer/storage/types/mat2x3_f32.wgsl


diff --git a/test/tint/buffer/storage/types/mat2x3.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x3.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/storage/types/mat2x3.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x3.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/storage/types/mat2x3.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x3.wgsl.expected.glsl
rename to test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.glsl


diff --git a/test/tint/buffer/storage/types/mat2x3.wgsl.expected.msl b/test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x3.wgsl.expected.msl
rename to test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.msl


diff --git a/test/tint/buffer/storage/types/mat2x3.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x3.wgsl.expected.spvasm
rename to test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/storage/types/mat2x3.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat2x3.wgsl.expected.wgsl
rename to test/tint/buffer/storage/types/mat2x3_f32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/storage/types/mat2x4_f16.wgsl b/test/tint/buffer/storage/types/mat2x4_f16.wgsl
new file mode 100644
index 0000000..75b62f4
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : mat2x4<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ac4058f
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+matrix<float16_t, 2, 4> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d823c51
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,23 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+matrix<float16_t, 2, 4> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A3CD399A80(4,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A3CD399A80(5,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..47b484c
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16mat2x4 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16mat2x4 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.msl
new file mode 100644
index 0000000..7901bcb
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half2x4* tint_symbol_1 [[buffer(0)]], const device half2x4* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..529e93f
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.spvasm

@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 8
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat2v4half = OpTypeMatrix %v4half 2
+   %in_block = OpTypeStruct %mat2v4half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat2v4half = OpTypePointer StorageBuffer %mat2v4half
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat2v4half %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat2v4half %in %uint_0
+         %17 = OpLoad %mat2v4half %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..29a329e
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : mat2x4<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat2x4_f32.wgsl b/test/tint/buffer/storage/types/mat2x4_f32.wgsl
new file mode 100644
index 0000000..148be1a
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : mat2x4<f32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2776e89
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+float2x4 tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2776e89
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,17 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+float2x4 tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..d1da44b
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  mat2x4 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  mat2x4 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.msl b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.msl
new file mode 100644
index 0000000..4672889
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device float2x4* tint_symbol_1 [[buffer(0)]], const device float2x4* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..be3b863
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.spvasm

@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 16
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+   %in_block = OpTypeStruct %mat2v4float
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %in %uint_0
+         %17 = OpLoad %mat2v4float %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..2a476a7
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat2x4_f32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : mat2x4<f32>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat3x2_f16.wgsl b/test/tint/buffer/storage/types/mat3x2_f16.wgsl
new file mode 100644
index 0000000..1274f1d
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x2_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : mat3x2<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f5984ae
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,18 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+}
+
+matrix<float16_t, 3, 2> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b48f684
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,24 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+}
+
+matrix<float16_t, 3, 2> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A3687DC740(4,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A3687DC740(5,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..e48dbc4
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16mat3x2 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16mat3x2 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.msl
new file mode 100644
index 0000000..c71e0e4
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half3x2* tint_symbol_1 [[buffer(0)]], const device half3x2* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..f39c243
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.spvasm

@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 4
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+   %in_block = OpTypeStruct %mat3v2half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat3v2half = OpTypePointer StorageBuffer %mat3v2half
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat3v2half %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat3v2half %in %uint_0
+         %17 = OpLoad %mat3v2half %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..d50b848
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x2_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : mat3x2<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat3x2.wgsl b/test/tint/buffer/storage/types/mat3x2_f32.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat3x2.wgsl
rename to test/tint/buffer/storage/types/mat3x2_f32.wgsl


diff --git a/test/tint/buffer/storage/types/mat3x2.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat3x2.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/storage/types/mat3x2.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat3x2.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/storage/types/mat3x2.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat3x2.wgsl.expected.glsl
rename to test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.glsl


diff --git a/test/tint/buffer/storage/types/mat3x2.wgsl.expected.msl b/test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/storage/types/mat3x2.wgsl.expected.msl
rename to test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.msl


diff --git a/test/tint/buffer/storage/types/mat3x2.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/storage/types/mat3x2.wgsl.expected.spvasm
rename to test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/storage/types/mat3x2.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat3x2.wgsl.expected.wgsl
rename to test/tint/buffer/storage/types/mat3x2_f32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/storage/types/mat3x3_f16.wgsl b/test/tint/buffer/storage/types/mat3x3_f16.wgsl
new file mode 100644
index 0000000..4b961bd
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : mat3x3<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..01aa7ad
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,18 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+}
+
+matrix<float16_t, 3, 3> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a5d9c58
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,24 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+}
+
+matrix<float16_t, 3, 3> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002722B6D74E0(4,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002722B6D74E0(5,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..00509e6
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16mat3 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16mat3 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.msl
new file mode 100644
index 0000000..e22bd15
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half3x3* tint_symbol_1 [[buffer(0)]], const device half3x3* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..ad8eff5
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.spvasm

@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 8
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat3v3half = OpTypeMatrix %v3half 3
+   %in_block = OpTypeStruct %mat3v3half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat3v3half = OpTypePointer StorageBuffer %mat3v3half
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat3v3half %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat3v3half %in %uint_0
+         %17 = OpLoad %mat3v3half %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..acb6b84
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : mat3x3<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat3x3_f32.wgsl b/test/tint/buffer/storage/types/mat3x3_f32.wgsl
new file mode 100644
index 0000000..e5ae1a5
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : mat3x3<f32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..086d35a
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,18 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+float3x3 tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..086d35a
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,18 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+float3x3 tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..b6e12e5
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  mat3 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  mat3 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.msl b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.msl
new file mode 100644
index 0000000..c8db576
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device float3x3* tint_symbol_1 [[buffer(0)]], const device float3x3* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..278850e
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.spvasm

@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 16
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+   %in_block = OpTypeStruct %mat3v3float
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %in %uint_0
+         %17 = OpLoad %mat3v3float %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..eec45bd
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x3_f32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : mat3x3<f32>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat3x4_f16.wgsl b/test/tint/buffer/storage/types/mat3x4_f16.wgsl
new file mode 100644
index 0000000..3dea88d
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : mat3x4<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c788def
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,18 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+}
+
+matrix<float16_t, 3, 4> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..56c3651
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,24 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+}
+
+matrix<float16_t, 3, 4> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 3, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F969F77390(4,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F969F77390(5,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..a09091a
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16mat3x4 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16mat3x4 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.msl
new file mode 100644
index 0000000..0668084
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half3x4* tint_symbol_1 [[buffer(0)]], const device half3x4* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..50d9234
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.spvasm

@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 8
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat3v4half = OpTypeMatrix %v4half 3
+   %in_block = OpTypeStruct %mat3v4half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat3v4half = OpTypePointer StorageBuffer %mat3v4half
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat3v4half %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat3v4half %in %uint_0
+         %17 = OpLoad %mat3v4half %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..44b9a64
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : mat3x4<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat3x4_f32.wgsl b/test/tint/buffer/storage/types/mat3x4_f32.wgsl
new file mode 100644
index 0000000..3ee71ee
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : mat3x4<f32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2d8f11b
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,18 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+float3x4 tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2d8f11b
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,18 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+float3x4 tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..a32c3e0
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  mat3x4 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  mat3x4 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.msl b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.msl
new file mode 100644
index 0000000..d6dbe7f
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device float3x4* tint_symbol_1 [[buffer(0)]], const device float3x4* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..9f0d97c
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.spvasm

@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 16
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+   %in_block = OpTypeStruct %mat3v4float
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %in %uint_0
+         %17 = OpLoad %mat3v4float %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..f8b8e5c
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat3x4_f32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : mat3x4<f32>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat4x2_f16.wgsl b/test/tint/buffer/storage/types/mat4x2_f16.wgsl
new file mode 100644
index 0000000..b56cef8
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : mat4x2<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..8dce844
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+matrix<float16_t, 4, 2> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)), buffer.Load<vector<float16_t, 2> >((offset + 12u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c3aacd6
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,25 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+matrix<float16_t, 4, 2> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 2>(buffer.Load<vector<float16_t, 2> >((offset + 0u)), buffer.Load<vector<float16_t, 2> >((offset + 4u)), buffer.Load<vector<float16_t, 2> >((offset + 8u)), buffer.Load<vector<float16_t, 2> >((offset + 12u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A0195E7740(4,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A0195E7740(5,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..f4c91fe
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16mat4x2 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16mat4x2 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.msl
new file mode 100644
index 0000000..1e4191e
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half4x2* tint_symbol_1 [[buffer(0)]], const device half4x2* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..5781d8e
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.spvasm

@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 4
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+ %mat4v2half = OpTypeMatrix %v2half 4
+   %in_block = OpTypeStruct %mat4v2half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat4v2half = OpTypePointer StorageBuffer %mat4v2half
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat4v2half %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat4v2half %in %uint_0
+         %17 = OpLoad %mat4v2half %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..b6be63a
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : mat4x2<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat4x2_f32.wgsl b/test/tint/buffer/storage/types/mat4x2_f32.wgsl
new file mode 100644
index 0000000..60244dc
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : mat4x2<f32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat4x2<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a8ca4f5
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+float4x2 tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a8ca4f5
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+float4x2 tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..8d7d981
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  mat4x2 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  mat4x2 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.msl b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.msl
new file mode 100644
index 0000000..3ff5e73
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device float4x2* tint_symbol_1 [[buffer(0)]], const device float4x2* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..da7afd5
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.spvasm

@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 8
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%mat4v2float = OpTypeMatrix %v2float 4
+   %in_block = OpTypeStruct %mat4v2float
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %in %uint_0
+         %17 = OpLoad %mat4v2float %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..4027c07
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x2_f32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : mat4x2<f32>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat4x2<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat4x3_f16.wgsl b/test/tint/buffer/storage/types/mat4x3_f16.wgsl
new file mode 100644
index 0000000..ab4ecf7
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : mat4x3<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..29a2a90
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+matrix<float16_t, 4, 3> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)), buffer.Load<vector<float16_t, 3> >((offset + 24u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a117a65
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,25 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+matrix<float16_t, 4, 3> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 3>(buffer.Load<vector<float16_t, 3> >((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), buffer.Load<vector<float16_t, 3> >((offset + 16u)), buffer.Load<vector<float16_t, 3> >((offset + 24u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000027711EB7740(4,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000027711EB7740(5,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..65e8a13
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16mat4x3 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16mat4x3 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.msl
new file mode 100644
index 0000000..57f08f6
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half4x3* tint_symbol_1 [[buffer(0)]], const device half4x3* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..5cd7565
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.spvasm

@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 8
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4v3half = OpTypeMatrix %v3half 4
+   %in_block = OpTypeStruct %mat4v3half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat4v3half = OpTypePointer StorageBuffer %mat4v3half
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat4v3half %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat4v3half %in %uint_0
+         %17 = OpLoad %mat4v3half %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..3223e4f
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : mat4x3<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat4x3_f32.wgsl b/test/tint/buffer/storage/types/mat4x3_f32.wgsl
new file mode 100644
index 0000000..0900fa7
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : mat4x3<f32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..44051da
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+float4x3 tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..44051da
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+float4x3 tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..a4a1a8f
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  mat4x3 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  mat4x3 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.msl b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.msl
new file mode 100644
index 0000000..7465a1a
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device float4x3* tint_symbol_1 [[buffer(0)]], const device float4x3* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..39fafdd
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.spvasm

@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 16
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+   %in_block = OpTypeStruct %mat4v3float
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %in %uint_0
+         %17 = OpLoad %mat4v3float %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..9bdd003
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x3_f32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : mat4x3<f32>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat4x4_f16.wgsl b/test/tint/buffer/storage/types/mat4x4_f16.wgsl
new file mode 100644
index 0000000..bddf877
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x4_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : mat4x4<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3931c45
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+matrix<float16_t, 4, 4> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)), buffer.Load<vector<float16_t, 4> >((offset + 24u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..9139ed2
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,25 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+matrix<float16_t, 4, 4> tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 4, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)), buffer.Load<vector<float16_t, 4> >((offset + 16u)), buffer.Load<vector<float16_t, 4> >((offset + 24u)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000023AE8057740(4,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000023AE8057740(5,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..1d549eb
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16mat4 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16mat4 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.msl
new file mode 100644
index 0000000..8059f64
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half4x4* tint_symbol_1 [[buffer(0)]], const device half4x4* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..9f990fb
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.spvasm

@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 18
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %in_block 0 ColMajor
+               OpMemberDecorate %in_block 0 MatrixStride 8
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat4v4half = OpTypeMatrix %v4half 4
+   %in_block = OpTypeStruct %mat4v4half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %8 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat4v4half = OpTypePointer StorageBuffer %mat4v4half
+       %main = OpFunction %void None %8
+         %11 = OpLabel
+         %15 = OpAccessChain %_ptr_StorageBuffer_mat4v4half %out %uint_0
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat4v4half %in %uint_0
+         %17 = OpLoad %mat4v4half %16
+               OpStore %15 %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..9284cf9
--- /dev/null
+++ b/test/tint/buffer/storage/types/mat4x4_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : mat4x4<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/mat4x4.wgsl b/test/tint/buffer/storage/types/mat4x4_f32.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat4x4.wgsl
rename to test/tint/buffer/storage/types/mat4x4_f32.wgsl


diff --git a/test/tint/buffer/storage/types/mat4x4.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat4x4.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/storage/types/mat4x4.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat4x4.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/storage/types/mat4x4.wgsl.expected.glsl b/test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat4x4.wgsl.expected.glsl
rename to test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.glsl


diff --git a/test/tint/buffer/storage/types/mat4x4.wgsl.expected.msl b/test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/storage/types/mat4x4.wgsl.expected.msl
rename to test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.msl


diff --git a/test/tint/buffer/storage/types/mat4x4.wgsl.expected.spvasm b/test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/storage/types/mat4x4.wgsl.expected.spvasm
rename to test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/storage/types/mat4x4.wgsl.expected.wgsl b/test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/mat4x4.wgsl.expected.wgsl
rename to test/tint/buffer/storage/types/mat4x4_f32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/storage/types/runtime_array.wgsl b/test/tint/buffer/storage/types/runtime_array.wgsl
deleted file mode 100644
index 4ecdc37..0000000
--- a/test/tint/buffer/storage/types/runtime_array.wgsl
+++ /dev/null

@@ -1,14 +0,0 @@
-struct S {
-  f : f32,
-};
-
-@group(0) @binding(0)
-var<storage, read> in : array<S>;
-
-@group(0) @binding(1)
-var<storage, read_write> out : array<S>;
-
-@compute @workgroup_size(1)
-fn main() {
-  out[0] = in[0];
-}

diff --git a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/runtime_array.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 0843539..0000000
--- a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,21 +0,0 @@
-struct S {
-  float f;
-};
-
-ByteAddressBuffer tint_symbol : register(t0, space0);
-RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
-
-void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, S value) {
-  buffer.Store((offset + 0u), asuint(value.f));
-}
-
-S tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
-  const S tint_symbol_6 = {asfloat(buffer.Load((offset + 0u)))};
-  return tint_symbol_6;
-}
-
-[numthreads(1, 1, 1)]
-void main() {
-  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
-  return;
-}

diff --git a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/runtime_array.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 0843539..0000000
--- a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,21 +0,0 @@
-struct S {
-  float f;
-};
-
-ByteAddressBuffer tint_symbol : register(t0, space0);
-RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
-
-void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, S value) {
-  buffer.Store((offset + 0u), asuint(value.f));
-}
-
-S tint_symbol_4(ByteAddressBuffer buffer, uint offset) {
-  const S tint_symbol_6 = {asfloat(buffer.Load((offset + 0u)))};
-  return tint_symbol_6;
-}
-
-[numthreads(1, 1, 1)]
-void main() {
-  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_4(tint_symbol, 0u));
-  return;
-}

diff --git a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.glsl b/test/tint/buffer/storage/types/runtime_array.wgsl.expected.glsl
deleted file mode 100644
index 40eb01b..0000000
--- a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.glsl
+++ /dev/null

@@ -1,23 +0,0 @@
-#version 310 es
-
-struct S {
-  float f;
-};
-
-layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
-  S inner[];
-} tint_symbol;
-
-layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
-  S inner[];
-} tint_symbol_1;
-
-void tint_symbol_2() {
-  tint_symbol_1.inner[0] = tint_symbol.inner[0];
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  tint_symbol_2();
-  return;
-}

diff --git a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.msl b/test/tint/buffer/storage/types/runtime_array.wgsl.expected.msl
deleted file mode 100644
index 836dba1..0000000
--- a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.msl
+++ /dev/null

@@ -1,33 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ float f;
-};
-
-struct tint_symbol_2 {
-  /* 0x0000 */ tint_array<S, 1> arr;
-};
-
-struct tint_symbol_4 {
-  /* 0x0000 */ tint_array<S, 1> arr;
-};
-
-kernel void tint_symbol(device tint_symbol_2* tint_symbol_1 [[buffer(0)]], const device tint_symbol_4* tint_symbol_3 [[buffer(1)]]) {
-  (*(tint_symbol_1)).arr[0] = (*(tint_symbol_3)).arr[0];
-  return;
-}
-

diff --git a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.spvasm b/test/tint/buffer/storage/types/runtime_array.wgsl.expected.spvasm
deleted file mode 100644
index 8fbc2ca..0000000
--- a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.spvasm
+++ /dev/null

@@ -1,47 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 20
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %main "main"
-               OpExecutionMode %main LocalSize 1 1 1
-               OpName %in_block "in_block"
-               OpMemberName %in_block 0 "inner"
-               OpName %S "S"
-               OpMemberName %S 0 "f"
-               OpName %in "in"
-               OpName %out "out"
-               OpName %main "main"
-               OpDecorate %in_block Block
-               OpMemberDecorate %in_block 0 Offset 0
-               OpMemberDecorate %S 0 Offset 0
-               OpDecorate %_runtimearr_S ArrayStride 4
-               OpDecorate %in NonWritable
-               OpDecorate %in DescriptorSet 0
-               OpDecorate %in Binding 0
-               OpDecorate %out DescriptorSet 0
-               OpDecorate %out Binding 1
-      %float = OpTypeFloat 32
-          %S = OpTypeStruct %float
-%_runtimearr_S = OpTypeRuntimeArray %S
-   %in_block = OpTypeStruct %_runtimearr_S
-%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
-         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
-        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
-       %void = OpTypeVoid
-          %8 = OpTypeFunction %void
-       %uint = OpTypeInt 32 0
-     %uint_0 = OpConstant %uint 0
-        %int = OpTypeInt 32 1
-         %15 = OpConstantNull %int
-%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
-       %main = OpFunction %void None %8
-         %11 = OpLabel
-         %17 = OpAccessChain %_ptr_StorageBuffer_S %out %uint_0 %15
-         %18 = OpAccessChain %_ptr_StorageBuffer_S %in %uint_0 %15
-         %19 = OpLoad %S %18
-               OpStore %17 %19
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.wgsl b/test/tint/buffer/storage/types/runtime_array.wgsl.expected.wgsl
deleted file mode 100644
index 3946635..0000000
--- a/test/tint/buffer/storage/types/runtime_array.wgsl.expected.wgsl
+++ /dev/null

@@ -1,12 +0,0 @@
-struct S {
-  f : f32,
-}
-
-@group(0) @binding(0) var<storage, read> in : array<S>;
-
-@group(0) @binding(1) var<storage, read_write> out : array<S>;
-
-@compute @workgroup_size(1)
-fn main() {
-  out[0] = in[0];
-}

diff --git a/test/tint/buffer/storage/types/runtime_array_f16.wgsl b/test/tint/buffer/storage/types/runtime_array_f16.wgsl
new file mode 100644
index 0000000..0df7afa
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : array<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : array<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out[0] = in[0];
+}

diff --git a/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..bfdede4
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store<float16_t>(0u, tint_symbol.Load<float16_t>(0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..922c506
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,13 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store<float16_t>(0u, tint_symbol.Load<float16_t>(0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A520622CB0(6,3-21): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..244510b
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  float16_t inner[];
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  float16_t inner[];
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner[0] = tint_symbol.inner[0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.msl
new file mode 100644
index 0000000..b6b4ed5
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.msl

@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_2 {
+  /* 0x0000 */ tint_array<half, 1> arr;
+};
+
+struct tint_symbol_4 {
+  /* 0x0000 */ tint_array<half, 1> arr;
+};
+
+kernel void tint_symbol(device tint_symbol_2* tint_symbol_1 [[buffer(0)]], const device tint_symbol_4* tint_symbol_3 [[buffer(1)]]) {
+  (*(tint_symbol_1)).arr[0] = (*(tint_symbol_3)).arr[0];
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..c3f1ad2
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.spvasm

@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 19
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %_runtimearr_half ArrayStride 2
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+%_runtimearr_half = OpTypeRuntimeArray %half
+   %in_block = OpTypeStruct %_runtimearr_half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+         %14 = OpConstantNull %int
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %16 = OpAccessChain %_ptr_StorageBuffer_half %out %uint_0 %14
+         %17 = OpAccessChain %_ptr_StorageBuffer_half %in %uint_0 %14
+         %18 = OpLoad %half %17
+               OpStore %16 %18
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..ac40351
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : array<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : array<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out[0] = in[0];
+}

diff --git a/test/tint/buffer/storage/types/runtime_array_f32.wgsl b/test/tint/buffer/storage/types/runtime_array_f32.wgsl
new file mode 100644
index 0000000..e32bbc5
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : array<f32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : array<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out[0] = in[0];
+}

diff --git a/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e6dca9e
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store(0u, asuint(asfloat(tint_symbol.Load(0u))));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e6dca9e
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store(0u, asuint(asfloat(tint_symbol.Load(0u))));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.glsl b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..2355b88
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  float inner[];
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  float inner[];
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner[0] = tint_symbol.inner[0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.msl b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.msl
new file mode 100644
index 0000000..65ac123
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.msl

@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_2 {
+  /* 0x0000 */ tint_array<float, 1> arr;
+};
+
+struct tint_symbol_4 {
+  /* 0x0000 */ tint_array<float, 1> arr;
+};
+
+kernel void tint_symbol(device tint_symbol_2* tint_symbol_1 [[buffer(0)]], const device tint_symbol_4* tint_symbol_3 [[buffer(1)]]) {
+  (*(tint_symbol_1)).arr[0] = (*(tint_symbol_3)).arr[0];
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..25e734c
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.spvasm

@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 19
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %_runtimearr_float ArrayStride 4
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+      %float = OpTypeFloat 32
+%_runtimearr_float = OpTypeRuntimeArray %float
+   %in_block = OpTypeStruct %_runtimearr_float
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+         %14 = OpConstantNull %int
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %16 = OpAccessChain %_ptr_StorageBuffer_float %out %uint_0 %14
+         %17 = OpAccessChain %_ptr_StorageBuffer_float %in %uint_0 %14
+         %18 = OpLoad %float %17
+               OpStore %16 %18
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..bfa5487
--- /dev/null
+++ b/test/tint/buffer/storage/types/runtime_array_f32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : array<f32>;
+
+@group(0) @binding(1) var<storage, read_write> out : array<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out[0] = in[0];
+}

diff --git a/test/tint/buffer/storage/types/struct.wgsl b/test/tint/buffer/storage/types/struct.wgsl
deleted file mode 100644
index ad75a94..0000000
--- a/test/tint/buffer/storage/types/struct.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct Inner {
-  f : f32,
-};
-struct S {
-  inner : Inner,
-};
-
-@group(0) @binding(0)
-var<storage, read> in : S;
-
-@group(0) @binding(1)
-var<storage, read_write> out : S;
-
-@compute @workgroup_size(1)
-fn main() {
-  out = in;
-}

diff --git a/test/tint/buffer/storage/types/struct.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/struct.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 05be600..0000000
--- a/test/tint/buffer/storage/types/struct.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,33 +0,0 @@
-struct Inner {
-  float f;
-};
-struct S {
-  Inner inner;
-};
-
-ByteAddressBuffer tint_symbol : register(t0, space0);
-RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
-
-void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, Inner value) {
-  buffer.Store((offset + 0u), asuint(value.f));
-}
-
-void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, S value) {
-  tint_symbol_3(buffer, (offset + 0u), value.inner);
-}
-
-Inner tint_symbol_6(ByteAddressBuffer buffer, uint offset) {
-  const Inner tint_symbol_8 = {asfloat(buffer.Load((offset + 0u)))};
-  return tint_symbol_8;
-}
-
-S tint_symbol_5(ByteAddressBuffer buffer, uint offset) {
-  const S tint_symbol_9 = {tint_symbol_6(buffer, (offset + 0u))};
-  return tint_symbol_9;
-}
-
-[numthreads(1, 1, 1)]
-void main() {
-  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_5(tint_symbol, 0u));
-  return;
-}

diff --git a/test/tint/buffer/storage/types/struct.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/struct.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 05be600..0000000
--- a/test/tint/buffer/storage/types/struct.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,33 +0,0 @@
-struct Inner {
-  float f;
-};
-struct S {
-  Inner inner;
-};
-
-ByteAddressBuffer tint_symbol : register(t0, space0);
-RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
-
-void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, Inner value) {
-  buffer.Store((offset + 0u), asuint(value.f));
-}
-
-void tint_symbol_2(RWByteAddressBuffer buffer, uint offset, S value) {
-  tint_symbol_3(buffer, (offset + 0u), value.inner);
-}
-
-Inner tint_symbol_6(ByteAddressBuffer buffer, uint offset) {
-  const Inner tint_symbol_8 = {asfloat(buffer.Load((offset + 0u)))};
-  return tint_symbol_8;
-}
-
-S tint_symbol_5(ByteAddressBuffer buffer, uint offset) {
-  const S tint_symbol_9 = {tint_symbol_6(buffer, (offset + 0u))};
-  return tint_symbol_9;
-}
-
-[numthreads(1, 1, 1)]
-void main() {
-  tint_symbol_2(tint_symbol_1, 0u, tint_symbol_5(tint_symbol, 0u));
-  return;
-}

diff --git a/test/tint/buffer/storage/types/struct.wgsl.expected.msl b/test/tint/buffer/storage/types/struct.wgsl.expected.msl
deleted file mode 100644
index 7f042b2..0000000
--- a/test/tint/buffer/storage/types/struct.wgsl.expected.msl
+++ /dev/null

@@ -1,16 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-struct Inner {
-  /* 0x0000 */ float f;
-};
-
-struct S {
-  /* 0x0000 */ Inner inner;
-};
-
-kernel void tint_symbol(device S* tint_symbol_1 [[buffer(0)]], const device S* tint_symbol_2 [[buffer(1)]]) {
-  *(tint_symbol_1) = *(tint_symbol_2);
-  return;
-}
-

diff --git a/test/tint/buffer/storage/types/struct.wgsl.expected.wgsl b/test/tint/buffer/storage/types/struct.wgsl.expected.wgsl
deleted file mode 100644
index ad3cde3..0000000
--- a/test/tint/buffer/storage/types/struct.wgsl.expected.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct Inner {
-  f : f32,
-}
-
-struct S {
-  inner : Inner,
-}
-
-@group(0) @binding(0) var<storage, read> in : S;
-
-@group(0) @binding(1) var<storage, read_write> out : S;
-
-@compute @workgroup_size(1)
-fn main() {
-  out = in;
-}

diff --git a/test/tint/buffer/storage/types/struct_f16.wgsl b/test/tint/buffer/storage/types/struct_f16.wgsl
new file mode 100644
index 0000000..4e91562
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f16.wgsl

@@ -0,0 +1,22 @@
+enable f16;
+
+struct Inner {
+  scalar_f16 : f16,
+  vec3_f16 : vec3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+};
+struct S {
+  inner : Inner,
+};
+
+@group(0) @binding(0)
+var<storage, read> in : S;
+
+@group(0) @binding(1)
+var<storage, read_write> out : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let t = in;
+  out = t;
+}

diff --git a/test/tint/buffer/storage/types/struct_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9a48940
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,47 @@
+struct Inner {
+  float16_t scalar_f16;
+  vector<float16_t, 3> vec3_f16;
+  matrix<float16_t, 2, 4> mat2x4_f16;
+};
+struct S {
+  Inner inner;
+};
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+matrix<float16_t, 2, 4> tint_symbol_6(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)));
+}
+
+Inner tint_symbol_3(ByteAddressBuffer buffer, uint offset) {
+  const Inner tint_symbol_14 = {buffer.Load<float16_t>((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), tint_symbol_6(buffer, (offset + 16u))};
+  return tint_symbol_14;
+}
+
+S tint_symbol_2(ByteAddressBuffer buffer, uint offset) {
+  const S tint_symbol_15 = {tint_symbol_3(buffer, (offset + 0u))};
+  return tint_symbol_15;
+}
+
+void tint_symbol_12(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_9(RWByteAddressBuffer buffer, uint offset, Inner value) {
+  buffer.Store<float16_t>((offset + 0u), value.scalar_f16);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value.vec3_f16);
+  tint_symbol_12(buffer, (offset + 16u), value.mat2x4_f16);
+}
+
+void tint_symbol_8(RWByteAddressBuffer buffer, uint offset, S value) {
+  tint_symbol_9(buffer, (offset + 0u), value.inner);
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const S t = tint_symbol_2(tint_symbol, 0u);
+  tint_symbol_8(tint_symbol_1, 0u, t);
+  return;
+}

diff --git a/test/tint/buffer/storage/types/struct_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c4b0487
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,52 @@
+SKIP: FAILED
+
+struct Inner {
+  float16_t scalar_f16;
+  vector<float16_t, 3> vec3_f16;
+  matrix<float16_t, 2, 4> mat2x4_f16;
+};
+struct S {
+  Inner inner;
+};
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+matrix<float16_t, 2, 4> tint_symbol_6(ByteAddressBuffer buffer, uint offset) {
+  return matrix<float16_t, 2, 4>(buffer.Load<vector<float16_t, 4> >((offset + 0u)), buffer.Load<vector<float16_t, 4> >((offset + 8u)));
+}
+
+Inner tint_symbol_3(ByteAddressBuffer buffer, uint offset) {
+  const Inner tint_symbol_14 = {buffer.Load<float16_t>((offset + 0u)), buffer.Load<vector<float16_t, 3> >((offset + 8u)), tint_symbol_6(buffer, (offset + 16u))};
+  return tint_symbol_14;
+}
+
+S tint_symbol_2(ByteAddressBuffer buffer, uint offset) {
+  const S tint_symbol_15 = {tint_symbol_3(buffer, (offset + 0u))};
+  return tint_symbol_15;
+}
+
+void tint_symbol_12(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_9(RWByteAddressBuffer buffer, uint offset, Inner value) {
+  buffer.Store<float16_t>((offset + 0u), value.scalar_f16);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value.vec3_f16);
+  tint_symbol_12(buffer, (offset + 16u), value.mat2x4_f16);
+}
+
+void tint_symbol_8(RWByteAddressBuffer buffer, uint offset, S value) {
+  tint_symbol_9(buffer, (offset + 0u), value.inner);
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const S t = tint_symbol_2(tint_symbol, 0u);
+  tint_symbol_8(tint_symbol_1, 0u, t);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A50B947690(2,3-11): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/storage/types/struct_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..341d140
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.glsl

@@ -0,0 +1,32 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  float16_t scalar_f16;
+  uint pad;
+  f16vec3 vec3_f16;
+  f16mat2x4 mat2x4_f16;
+};
+
+struct S {
+  Inner inner;
+};
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  S inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  S inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  S t = tint_symbol.inner;
+  tint_symbol_1.inner = t;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/struct_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.msl
new file mode 100644
index 0000000..ffb7311
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half scalar_f16;
+  /* 0x0002 */ tint_array<int8_t, 6> tint_pad;
+  /* 0x0008 */ packed_half3 vec3_f16;
+  /* 0x000e */ tint_array<int8_t, 2> tint_pad_1;
+  /* 0x0010 */ half2x4 mat2x4_f16;
+};
+
+struct S {
+  /* 0x0000 */ Inner inner;
+};
+
+kernel void tint_symbol(const device S* tint_symbol_1 [[buffer(1)]], device S* tint_symbol_2 [[buffer(0)]]) {
+  S const t = *(tint_symbol_1);
+  *(tint_symbol_2) = t;
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/struct_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..d981f5e
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.spvasm

@@ -0,0 +1,60 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 21
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "inner"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "scalar_f16"
+               OpMemberName %Inner 1 "vec3_f16"
+               OpMemberName %Inner 2 "mat2x4_f16"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 1 Offset 8
+               OpMemberDecorate %Inner 2 Offset 16
+               OpMemberDecorate %Inner 2 ColMajor
+               OpMemberDecorate %Inner 2 MatrixStride 8
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+     %v4half = OpTypeVector %half 4
+ %mat2v4half = OpTypeMatrix %v4half 2
+      %Inner = OpTypeStruct %half %v3half %mat2v4half
+          %S = OpTypeStruct %Inner
+   %in_block = OpTypeStruct %S
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+         %11 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+       %main = OpFunction %void None %11
+         %14 = OpLabel
+         %18 = OpAccessChain %_ptr_StorageBuffer_S %in %uint_0
+         %19 = OpLoad %S %18
+         %20 = OpAccessChain %_ptr_StorageBuffer_S %out %uint_0
+               OpStore %20 %19
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/struct_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..72cd847
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f16.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+enable f16;
+
+struct Inner {
+  scalar_f16 : f16,
+  vec3_f16 : vec3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+}
+
+struct S {
+  inner : Inner,
+}
+
+@group(0) @binding(0) var<storage, read> in : S;
+
+@group(0) @binding(1) var<storage, read_write> out : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let t = in;
+  out = t;
+}

diff --git a/test/tint/buffer/storage/types/struct_f32.wgsl b/test/tint/buffer/storage/types/struct_f32.wgsl
new file mode 100644
index 0000000..2c5ba50
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f32.wgsl

@@ -0,0 +1,20 @@
+struct Inner {
+  scalar_f32 : f32,
+  vec3_f32 : vec3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+};
+struct S {
+  inner : Inner,
+};
+
+@group(0) @binding(0)
+var<storage, read> in : S;
+
+@group(0) @binding(1)
+var<storage, read_write> out : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let t = in;
+  out = t;
+}

diff --git a/test/tint/buffer/storage/types/struct_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b32317f
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,47 @@
+struct Inner {
+  float scalar_f32;
+  float3 vec3_f32;
+  float2x4 mat2x4_f32;
+};
+struct S {
+  Inner inner;
+};
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+float2x4 tint_symbol_6(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+Inner tint_symbol_3(ByteAddressBuffer buffer, uint offset) {
+  const Inner tint_symbol_14 = {asfloat(buffer.Load((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), tint_symbol_6(buffer, (offset + 32u))};
+  return tint_symbol_14;
+}
+
+S tint_symbol_2(ByteAddressBuffer buffer, uint offset) {
+  const S tint_symbol_15 = {tint_symbol_3(buffer, (offset + 0u))};
+  return tint_symbol_15;
+}
+
+void tint_symbol_12(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_9(RWByteAddressBuffer buffer, uint offset, Inner value) {
+  buffer.Store((offset + 0u), asuint(value.scalar_f32));
+  buffer.Store3((offset + 16u), asuint(value.vec3_f32));
+  tint_symbol_12(buffer, (offset + 32u), value.mat2x4_f32);
+}
+
+void tint_symbol_8(RWByteAddressBuffer buffer, uint offset, S value) {
+  tint_symbol_9(buffer, (offset + 0u), value.inner);
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const S t = tint_symbol_2(tint_symbol, 0u);
+  tint_symbol_8(tint_symbol_1, 0u, t);
+  return;
+}

diff --git a/test/tint/buffer/storage/types/struct_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b32317f
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,47 @@
+struct Inner {
+  float scalar_f32;
+  float3 vec3_f32;
+  float2x4 mat2x4_f32;
+};
+struct S {
+  Inner inner;
+};
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+float2x4 tint_symbol_6(ByteAddressBuffer buffer, uint offset) {
+  return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));
+}
+
+Inner tint_symbol_3(ByteAddressBuffer buffer, uint offset) {
+  const Inner tint_symbol_14 = {asfloat(buffer.Load((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), tint_symbol_6(buffer, (offset + 32u))};
+  return tint_symbol_14;
+}
+
+S tint_symbol_2(ByteAddressBuffer buffer, uint offset) {
+  const S tint_symbol_15 = {tint_symbol_3(buffer, (offset + 0u))};
+  return tint_symbol_15;
+}
+
+void tint_symbol_12(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_9(RWByteAddressBuffer buffer, uint offset, Inner value) {
+  buffer.Store((offset + 0u), asuint(value.scalar_f32));
+  buffer.Store3((offset + 16u), asuint(value.vec3_f32));
+  tint_symbol_12(buffer, (offset + 32u), value.mat2x4_f32);
+}
+
+void tint_symbol_8(RWByteAddressBuffer buffer, uint offset, S value) {
+  tint_symbol_9(buffer, (offset + 0u), value.inner);
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const S t = tint_symbol_2(tint_symbol, 0u);
+  tint_symbol_8(tint_symbol_1, 0u, t);
+  return;
+}

diff --git a/test/tint/buffer/storage/types/struct_f32.wgsl.expected.glsl b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..cd535d3
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.glsl

@@ -0,0 +1,34 @@
+#version 310 es
+
+struct Inner {
+  float scalar_f32;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  vec3 vec3_f32;
+  uint pad_3;
+  mat2x4 mat2x4_f32;
+};
+
+struct S {
+  Inner inner;
+};
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  S inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  S inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  S t = tint_symbol.inner;
+  tint_symbol_1.inner = t;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/struct_f32.wgsl.expected.msl b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.msl
new file mode 100644
index 0000000..0df3bc4
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ packed_float3 vec3_f32;
+  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
+  /* 0x0020 */ float2x4 mat2x4_f32;
+};
+
+struct S {
+  /* 0x0000 */ Inner inner;
+};
+
+kernel void tint_symbol(const device S* tint_symbol_1 [[buffer(1)]], device S* tint_symbol_2 [[buffer(0)]]) {
+  S const t = *(tint_symbol_1);
+  *(tint_symbol_2) = t;
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/struct_f32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..0da5cf1
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.spvasm

@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 21
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "inner"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "scalar_f32"
+               OpMemberName %Inner 1 "vec3_f32"
+               OpMemberName %Inner 2 "mat2x4_f32"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 1 Offset 16
+               OpMemberDecorate %Inner 2 Offset 32
+               OpMemberDecorate %Inner 2 ColMajor
+               OpMemberDecorate %Inner 2 MatrixStride 16
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+      %Inner = OpTypeStruct %float %v3float %mat2v4float
+          %S = OpTypeStruct %Inner
+   %in_block = OpTypeStruct %S
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+         %11 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+       %main = OpFunction %void None %11
+         %14 = OpLabel
+         %18 = OpAccessChain %_ptr_StorageBuffer_S %in %uint_0
+         %19 = OpLoad %S %18
+         %20 = OpAccessChain %_ptr_StorageBuffer_S %out %uint_0
+               OpStore %20 %19
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/struct_f32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..dde254b
--- /dev/null
+++ b/test/tint/buffer/storage/types/struct_f32.wgsl.expected.wgsl

@@ -0,0 +1,19 @@
+struct Inner {
+  scalar_f32 : f32,
+  vec3_f32 : vec3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+}
+
+struct S {
+  inner : Inner,
+}
+
+@group(0) @binding(0) var<storage, read> in : S;
+
+@group(0) @binding(1) var<storage, read_write> out : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let t = in;
+  out = t;
+}

diff --git a/test/tint/buffer/storage/types/vec2_f16.wgsl b/test/tint/buffer/storage/types/vec2_f16.wgsl
new file mode 100644
index 0000000..74e5f4b
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : vec2<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : vec2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..109befb
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store<vector<float16_t, 2> >(0u, tint_symbol.Load<vector<float16_t, 2> >(0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7756784
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,13 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store<vector<float16_t, 2> >(0u, tint_symbol.Load<vector<float16_t, 2> >(0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000017510013100(6,3-21): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..1ff8c34
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16vec2 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16vec2 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.msl
new file mode 100644
index 0000000..420e73b
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half2* tint_symbol_1 [[buffer(0)]], const device half2* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..3ecc71d
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.spvasm

@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 17
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %in_block = OpTypeStruct %v2half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_StorageBuffer_v2half %out %uint_0
+         %15 = OpAccessChain %_ptr_StorageBuffer_v2half %in %uint_0
+         %16 = OpLoad %v2half %15
+               OpStore %14 %16
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..197d6c1
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : vec2<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : vec2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec2_f32.wgsl b/test/tint/buffer/storage/types/vec2_f32.wgsl
new file mode 100644
index 0000000..b274bfd
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : vec2<f32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : vec2<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0b35cb3
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store2(0u, asuint(asfloat(tint_symbol.Load2(0u))));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0b35cb3
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store2(0u, asuint(asfloat(tint_symbol.Load2(0u))));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..ffdfc2b
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  vec2 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  vec2 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.msl b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.msl
new file mode 100644
index 0000000..a0396b3
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device float2* tint_symbol_1 [[buffer(0)]], const device float2* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..95447ff
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.spvasm

@@ -0,0 +1,40 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 17
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %in_block = OpTypeStruct %v2float
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_StorageBuffer_v2float %out %uint_0
+         %15 = OpAccessChain %_ptr_StorageBuffer_v2float %in %uint_0
+         %16 = OpLoad %v2float %15
+               OpStore %14 %16
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..36117da
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_f32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : vec2<f32>;
+
+@group(0) @binding(1) var<storage, read_write> out : vec2<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec2.wgsl b/test/tint/buffer/storage/types/vec2_i32.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec2.wgsl
rename to test/tint/buffer/storage/types/vec2_i32.wgsl


diff --git a/test/tint/buffer/storage/types/vec2.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec2_i32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec2.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/storage/types/vec2_i32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/storage/types/vec2.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec2_i32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec2.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/storage/types/vec2_i32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/storage/types/vec2.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec2_i32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec2.wgsl.expected.glsl
rename to test/tint/buffer/storage/types/vec2_i32.wgsl.expected.glsl


diff --git a/test/tint/buffer/storage/types/vec2.wgsl.expected.msl b/test/tint/buffer/storage/types/vec2_i32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/storage/types/vec2.wgsl.expected.msl
rename to test/tint/buffer/storage/types/vec2_i32.wgsl.expected.msl


diff --git a/test/tint/buffer/storage/types/vec2.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec2_i32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/storage/types/vec2.wgsl.expected.spvasm
rename to test/tint/buffer/storage/types/vec2_i32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/storage/types/vec2.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec2_i32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec2.wgsl.expected.wgsl
rename to test/tint/buffer/storage/types/vec2_i32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/storage/types/vec2_u32.wgsl b/test/tint/buffer/storage/types/vec2_u32.wgsl
new file mode 100644
index 0000000..345cbe7
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_u32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : vec2<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : vec2<u32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7acc823
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store2(0u, asuint(tint_symbol.Load2(0u)));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7acc823
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store2(0u, asuint(tint_symbol.Load2(0u)));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.glsl
new file mode 100644
index 0000000..269bd1f
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  uvec2 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  uvec2 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.msl b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.msl
new file mode 100644
index 0000000..412fcd1
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device uint2* tint_symbol_1 [[buffer(0)]], const device uint2* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.spvasm
new file mode 100644
index 0000000..f79e277
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.spvasm

@@ -0,0 +1,39 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 16
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %uint = OpTypeInt 32 0
+     %v2uint = OpTypeVector %uint 2
+   %in_block = OpTypeStruct %v2uint
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %13 = OpAccessChain %_ptr_StorageBuffer_v2uint %out %uint_0
+         %14 = OpAccessChain %_ptr_StorageBuffer_v2uint %in %uint_0
+         %15 = OpLoad %v2uint %14
+               OpStore %13 %15
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.wgsl
new file mode 100644
index 0000000..cfe55fb
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec2_u32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : vec2<u32>;
+
+@group(0) @binding(1) var<storage, read_write> out : vec2<u32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec3_f16.wgsl b/test/tint/buffer/storage/types/vec3_f16.wgsl
new file mode 100644
index 0000000..ec8914a
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : vec3<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : vec3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9bb54e8
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store<vector<float16_t, 3> >(0u, tint_symbol.Load<vector<float16_t, 3> >(0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8354072
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,13 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store<vector<float16_t, 3> >(0u, tint_symbol.Load<vector<float16_t, 3> >(0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000207E7334340(6,3-21): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..ce9061f
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16vec3 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16vec3 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.msl
new file mode 100644
index 0000000..d5bfb88
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half3* tint_symbol_1 [[buffer(0)]], const device half3* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..21de92d
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.spvasm

@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 17
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %in_block = OpTypeStruct %v3half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_StorageBuffer_v3half %out %uint_0
+         %15 = OpAccessChain %_ptr_StorageBuffer_v3half %in %uint_0
+         %16 = OpLoad %v3half %15
+               OpStore %14 %16
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..b49d6f3
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : vec3<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : vec3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec3_f32.wgsl b/test/tint/buffer/storage/types/vec3_f32.wgsl
new file mode 100644
index 0000000..b0bd77a
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : vec3<f32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : vec3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..fa40436
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store3(0u, asuint(asfloat(tint_symbol.Load3(0u))));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..fa40436
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store3(0u, asuint(asfloat(tint_symbol.Load3(0u))));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..fc44035
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  vec3 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  vec3 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.msl b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.msl
new file mode 100644
index 0000000..800b609
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device float3* tint_symbol_1 [[buffer(0)]], const device float3* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..f6c29a4
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.spvasm

@@ -0,0 +1,40 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 17
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+   %in_block = OpTypeStruct %v3float
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_StorageBuffer_v3float %out %uint_0
+         %15 = OpAccessChain %_ptr_StorageBuffer_v3float %in %uint_0
+         %16 = OpLoad %v3float %15
+               OpStore %14 %16
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..5e4581e
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_f32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : vec3<f32>;
+
+@group(0) @binding(1) var<storage, read_write> out : vec3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec3_i32.wgsl b/test/tint/buffer/storage/types/vec3_i32.wgsl
new file mode 100644
index 0000000..2d123ab
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_i32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : vec3<i32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : vec3<i32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..66af38f
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store3(0u, asuint(asint(tint_symbol.Load3(0u))));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..66af38f
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store3(0u, asuint(asint(tint_symbol.Load3(0u))));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.glsl
new file mode 100644
index 0000000..2e4649d
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  ivec3 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  ivec3 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.msl b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.msl
new file mode 100644
index 0000000..d056869
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device int3* tint_symbol_1 [[buffer(0)]], const device int3* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.spvasm
new file mode 100644
index 0000000..aaebcf5
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.spvasm

@@ -0,0 +1,40 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 17
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+        %int = OpTypeInt 32 1
+      %v3int = OpTypeVector %int 3
+   %in_block = OpTypeStruct %v3int
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_StorageBuffer_v3int %out %uint_0
+         %15 = OpAccessChain %_ptr_StorageBuffer_v3int %in %uint_0
+         %16 = OpLoad %v3int %15
+               OpStore %14 %16
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.wgsl
new file mode 100644
index 0000000..89bfe47
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec3_i32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : vec3<i32>;
+
+@group(0) @binding(1) var<storage, read_write> out : vec3<i32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec3.wgsl b/test/tint/buffer/storage/types/vec3_u32.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec3.wgsl
rename to test/tint/buffer/storage/types/vec3_u32.wgsl


diff --git a/test/tint/buffer/storage/types/vec3.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec3_u32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec3.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/storage/types/vec3_u32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/storage/types/vec3.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec3_u32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec3.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/storage/types/vec3_u32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/storage/types/vec3.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec3_u32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec3.wgsl.expected.glsl
rename to test/tint/buffer/storage/types/vec3_u32.wgsl.expected.glsl


diff --git a/test/tint/buffer/storage/types/vec3.wgsl.expected.msl b/test/tint/buffer/storage/types/vec3_u32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/storage/types/vec3.wgsl.expected.msl
rename to test/tint/buffer/storage/types/vec3_u32.wgsl.expected.msl


diff --git a/test/tint/buffer/storage/types/vec3.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec3_u32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/storage/types/vec3.wgsl.expected.spvasm
rename to test/tint/buffer/storage/types/vec3_u32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/storage/types/vec3.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec3_u32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec3.wgsl.expected.wgsl
rename to test/tint/buffer/storage/types/vec3_u32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/storage/types/vec4_f16.wgsl b/test/tint/buffer/storage/types/vec4_f16.wgsl
new file mode 100644
index 0000000..8afe29f
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_f16.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0)
+var<storage, read> in : vec4<f16>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : vec4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0fea94d
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store<vector<float16_t, 4> >(0u, tint_symbol.Load<vector<float16_t, 4> >(0u));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5910328
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,13 @@
+SKIP: FAILED
+
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store<vector<float16_t, 4> >(0u, tint_symbol.Load<vector<float16_t, 4> >(0u));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001D5DD394680(6,3-21): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..e8e1dde
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.glsl

@@ -0,0 +1,20 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  f16vec4 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  f16vec4 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.msl b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.msl
new file mode 100644
index 0000000..fb77d9b
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device half4* tint_symbol_1 [[buffer(0)]], const device half4* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..1054de34c
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.spvasm

@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 17
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %in_block = OpTypeStruct %v4half
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_StorageBuffer_v4half %out %uint_0
+         %15 = OpAccessChain %_ptr_StorageBuffer_v4half %in %uint_0
+         %16 = OpLoad %v4half %15
+               OpStore %14 %16
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..7c54506
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_f16.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<storage, read> in : vec4<f16>;
+
+@group(0) @binding(1) var<storage, read_write> out : vec4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec4.wgsl b/test/tint/buffer/storage/types/vec4_f32.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec4.wgsl
rename to test/tint/buffer/storage/types/vec4_f32.wgsl


diff --git a/test/tint/buffer/storage/types/vec4.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec4_f32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec4.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/storage/types/vec4_f32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/storage/types/vec4.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec4_f32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec4.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/storage/types/vec4_f32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/storage/types/vec4.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec4_f32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec4.wgsl.expected.glsl
rename to test/tint/buffer/storage/types/vec4_f32.wgsl.expected.glsl


diff --git a/test/tint/buffer/storage/types/vec4.wgsl.expected.msl b/test/tint/buffer/storage/types/vec4_f32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/storage/types/vec4.wgsl.expected.msl
rename to test/tint/buffer/storage/types/vec4_f32.wgsl.expected.msl


diff --git a/test/tint/buffer/storage/types/vec4.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec4_f32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/storage/types/vec4.wgsl.expected.spvasm
rename to test/tint/buffer/storage/types/vec4_f32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/storage/types/vec4.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec4_f32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/storage/types/vec4.wgsl.expected.wgsl
rename to test/tint/buffer/storage/types/vec4_f32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/storage/types/vec4_i32.wgsl b/test/tint/buffer/storage/types/vec4_i32.wgsl
new file mode 100644
index 0000000..8f5f825
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_i32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : vec4<i32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : vec4<i32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3e5b3f6
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store4(0u, asuint(asint(tint_symbol.Load4(0u))));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..3e5b3f6
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store4(0u, asuint(asint(tint_symbol.Load4(0u))));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.glsl
new file mode 100644
index 0000000..5c3c067
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  ivec4 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  ivec4 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.msl b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.msl
new file mode 100644
index 0000000..56e60b0
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device int4* tint_symbol_1 [[buffer(0)]], const device int4* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.spvasm
new file mode 100644
index 0000000..e7ec0df
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.spvasm

@@ -0,0 +1,40 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 17
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+        %int = OpTypeInt 32 1
+      %v4int = OpTypeVector %int 4
+   %in_block = OpTypeStruct %v4int
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_v4int = OpTypePointer StorageBuffer %v4int
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_StorageBuffer_v4int %out %uint_0
+         %15 = OpAccessChain %_ptr_StorageBuffer_v4int %in %uint_0
+         %16 = OpLoad %v4int %15
+               OpStore %14 %16
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.wgsl
new file mode 100644
index 0000000..c9774fb
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_i32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : vec4<i32>;
+
+@group(0) @binding(1) var<storage, read_write> out : vec4<i32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec4_u32.wgsl b/test/tint/buffer/storage/types/vec4_u32.wgsl
new file mode 100644
index 0000000..9542278
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_u32.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0)
+var<storage, read> in : vec4<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> out : vec4<u32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.dxc.hlsl b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..59665d7
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store4(0u, asuint(tint_symbol.Load4(0u)));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.fxc.hlsl b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..59665d7
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,8 @@
+ByteAddressBuffer tint_symbol : register(t0, space0);
+RWByteAddressBuffer tint_symbol_1 : register(u1, space0);
+
+[numthreads(1, 1, 1)]
+void main() {
+  tint_symbol_1.Store4(0u, asuint(tint_symbol.Load4(0u)));
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.glsl b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.glsl
new file mode 100644
index 0000000..ff88441
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std430) buffer tint_symbol_block_ssbo {
+  uvec4 inner;
+} tint_symbol;
+
+layout(binding = 1, std430) buffer tint_symbol_block_ssbo_1 {
+  uvec4 inner;
+} tint_symbol_1;
+
+void tint_symbol_2() {
+  tint_symbol_1.inner = tint_symbol.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol_2();
+  return;
+}

diff --git a/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.msl b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.msl
new file mode 100644
index 0000000..d6bce0a
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(device uint4* tint_symbol_1 [[buffer(0)]], const device uint4* tint_symbol_2 [[buffer(1)]]) {
+  *(tint_symbol_1) = *(tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.spvasm b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.spvasm
new file mode 100644
index 0000000..c779b48
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.spvasm

@@ -0,0 +1,39 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 16
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %in_block "in_block"
+               OpMemberName %in_block 0 "inner"
+               OpName %in "in"
+               OpName %out "out"
+               OpName %main "main"
+               OpDecorate %in_block Block
+               OpMemberDecorate %in_block 0 Offset 0
+               OpDecorate %in NonWritable
+               OpDecorate %in DescriptorSet 0
+               OpDecorate %in Binding 0
+               OpDecorate %out DescriptorSet 0
+               OpDecorate %out Binding 1
+       %uint = OpTypeInt 32 0
+     %v4uint = OpTypeVector %uint 4
+   %in_block = OpTypeStruct %v4uint
+%_ptr_StorageBuffer_in_block = OpTypePointer StorageBuffer %in_block
+         %in = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+        %out = OpVariable %_ptr_StorageBuffer_in_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_v4uint = OpTypePointer StorageBuffer %v4uint
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %13 = OpAccessChain %_ptr_StorageBuffer_v4uint %out %uint_0
+         %14 = OpAccessChain %_ptr_StorageBuffer_v4uint %in %uint_0
+         %15 = OpLoad %v4uint %14
+               OpStore %13 %15
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.wgsl b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.wgsl
new file mode 100644
index 0000000..7d349db
--- /dev/null
+++ b/test/tint/buffer/storage/types/vec4_u32.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<storage, read> in : vec4<u32>;
+
+@group(0) @binding(1) var<storage, read_write> out : vec4<u32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  out = in;
+}

diff --git a/test/tint/buffer/uniform/dynamic_index/read.wgsl b/test/tint/buffer/uniform/dynamic_index/read.wgsl
index 05d1083..730ba80 100644
--- a/test/tint/buffer/uniform/dynamic_index/read.wgsl
+++ b/test/tint/buffer/uniform/dynamic_index/read.wgsl

@@ -1,34 +1,56 @@
 struct Inner {
-    a : vec3<i32>,
-    b : i32,
-    c : vec3<u32>,
-    d : u32,
-    e : vec3<f32>,
-    f : f32,
-    g : vec2<i32>,
-    h : vec2<i32>,
-    i : mat2x3<f32>,
-    @align(16) j : mat3x2<f32>,
-    @align(16) k : array<vec4<i32>, 4>,
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    @align(16) arr2_vec3_f32 : array<vec3<f32>, 2>,
 };
 
 struct S {
     arr : array<Inner, 8>,
 };
 
-@binding(0) @group(0) var<uniform> s : S;
+@binding(0) @group(0) var<uniform> ub : S;
 
 @compute @workgroup_size(1)
 fn main(@builtin(local_invocation_index) idx : u32) {
-    let a = s.arr[idx].a;
-    let b = s.arr[idx].b;
-    let c = s.arr[idx].c;
-    let d = s.arr[idx].d;
-    let e = s.arr[idx].e;
-    let f = s.arr[idx].f;
-    let g = s.arr[idx].g;
-    let h = s.arr[idx].h;
-    let i = s.arr[idx].i;
-    let j = s.arr[idx].j;
-    let k = s.arr[idx].k;
-}
+    let scalar_f32 : f32 = ub.arr[idx].scalar_f32;
+    let scalar_i32 : i32 = ub.arr[idx].scalar_i32;
+    let scalar_u32 : u32 = ub.arr[idx].scalar_u32;
+    let vec2_f32 : vec2<f32> = ub.arr[idx].vec2_f32;
+    let vec2_i32 : vec2<i32> = ub.arr[idx].vec2_i32;
+    let vec2_u32 : vec2<u32> = ub.arr[idx].vec2_u32;
+    let vec3_f32 : vec3<f32> = ub.arr[idx].vec3_f32;
+    let vec3_i32 : vec3<i32> = ub.arr[idx].vec3_i32;
+    let vec3_u32 : vec3<u32> = ub.arr[idx].vec3_u32;
+    let vec4_f32 : vec4<f32> = ub.arr[idx].vec4_f32;
+    let vec4_i32 : vec4<i32> = ub.arr[idx].vec4_i32;
+    let vec4_u32 : vec4<u32> = ub.arr[idx].vec4_u32;
+    let mat2x2_f32 : mat2x2<f32> = ub.arr[idx].mat2x2_f32;
+    let mat2x3_f32 : mat2x3<f32> = ub.arr[idx].mat2x3_f32;
+    let mat2x4_f32 : mat2x4<f32> = ub.arr[idx].mat2x4_f32;
+    let mat3x2_f32 : mat3x2<f32> = ub.arr[idx].mat3x2_f32;
+    let mat3x3_f32 : mat3x3<f32> = ub.arr[idx].mat3x3_f32;
+    let mat3x4_f32 : mat3x4<f32> = ub.arr[idx].mat3x4_f32;
+    let mat4x2_f32 : mat4x2<f32> = ub.arr[idx].mat4x2_f32;
+    let mat4x3_f32 : mat4x3<f32> = ub.arr[idx].mat4x3_f32;
+    let mat4x4_f32 : mat4x4<f32> = ub.arr[idx].mat4x4_f32;
+    let arr2_vec3_f32 : array<vec3<f32>, 2> = ub.arr[idx].arr2_vec3_f32;
+}
\ No newline at end of file

diff --git a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.dxc.hlsl
index b110fb7..4e80c66 100644
--- a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.dxc.hlsl

@@ -1,61 +1,133 @@
-cbuffer cbuffer_s : register(b0, space0) {
-  uint4 s[96];
+cbuffer cbuffer_ub : register(b0, space0) {
+  uint4 ub[272];
 };
 
 struct tint_symbol_1 {
   uint idx : SV_GroupIndex;
 };
 
-float2x3 tint_symbol_9(uint4 buffer[96], uint offset) {
+float2x2 tint_symbol_14(uint4 buffer[272], uint offset) {
   const uint scalar_offset = ((offset + 0u)) / 4;
-  const uint scalar_offset_1 = ((offset + 16u)) / 4;
-  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
 }
 
-float3x2 tint_symbol_10(uint4 buffer[96], uint offset) {
+float2x3 tint_symbol_15(uint4 buffer[272], uint offset) {
   const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_3 / 4];
-  const uint scalar_offset_4 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_4 / 4];
-  return float3x2(asfloat(((scalar_offset_2 & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_4 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+  const uint scalar_offset_3 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
 }
 
-typedef int4 tint_symbol_12_ret[4];
-tint_symbol_12_ret tint_symbol_12(uint4 buffer[96], uint offset) {
-  int4 arr_1[4] = (int4[4])0;
+float2x4 tint_symbol_16(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset_4 / 4]), asfloat(buffer[scalar_offset_5 / 4]));
+}
+
+float3x2 tint_symbol_17(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_6 = ((offset + 0u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_6 / 4];
+  const uint scalar_offset_7 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_7 / 4];
+  const uint scalar_offset_8 = ((offset + 16u)) / 4;
+  uint4 ubo_load_4 = buffer[scalar_offset_8 / 4];
+  return float3x2(asfloat(((scalar_offset_6 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_7 & 2) ? ubo_load_3.zw : ubo_load_3.xy)), asfloat(((scalar_offset_8 & 2) ? ubo_load_4.zw : ubo_load_4.xy)));
+}
+
+float3x3 tint_symbol_18(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_9 = ((offset + 0u)) / 4;
+  const uint scalar_offset_10 = ((offset + 16u)) / 4;
+  const uint scalar_offset_11 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset_9 / 4].xyz), asfloat(buffer[scalar_offset_10 / 4].xyz), asfloat(buffer[scalar_offset_11 / 4].xyz));
+}
+
+float3x4 tint_symbol_19(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_12 = ((offset + 0u)) / 4;
+  const uint scalar_offset_13 = ((offset + 16u)) / 4;
+  const uint scalar_offset_14 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset_12 / 4]), asfloat(buffer[scalar_offset_13 / 4]), asfloat(buffer[scalar_offset_14 / 4]));
+}
+
+float4x2 tint_symbol_20(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_15 = ((offset + 0u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_15 / 4];
+  const uint scalar_offset_16 = ((offset + 8u)) / 4;
+  uint4 ubo_load_6 = buffer[scalar_offset_16 / 4];
+  const uint scalar_offset_17 = ((offset + 16u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_17 / 4];
+  const uint scalar_offset_18 = ((offset + 24u)) / 4;
+  uint4 ubo_load_8 = buffer[scalar_offset_18 / 4];
+  return float4x2(asfloat(((scalar_offset_15 & 2) ? ubo_load_5.zw : ubo_load_5.xy)), asfloat(((scalar_offset_16 & 2) ? ubo_load_6.zw : ubo_load_6.xy)), asfloat(((scalar_offset_17 & 2) ? ubo_load_7.zw : ubo_load_7.xy)), asfloat(((scalar_offset_18 & 2) ? ubo_load_8.zw : ubo_load_8.xy)));
+}
+
+float4x3 tint_symbol_21(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_19 = ((offset + 0u)) / 4;
+  const uint scalar_offset_20 = ((offset + 16u)) / 4;
+  const uint scalar_offset_21 = ((offset + 32u)) / 4;
+  const uint scalar_offset_22 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset_19 / 4].xyz), asfloat(buffer[scalar_offset_20 / 4].xyz), asfloat(buffer[scalar_offset_21 / 4].xyz), asfloat(buffer[scalar_offset_22 / 4].xyz));
+}
+
+float4x4 tint_symbol_22(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_23 = ((offset + 0u)) / 4;
+  const uint scalar_offset_24 = ((offset + 16u)) / 4;
+  const uint scalar_offset_25 = ((offset + 32u)) / 4;
+  const uint scalar_offset_26 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset_23 / 4]), asfloat(buffer[scalar_offset_24 / 4]), asfloat(buffer[scalar_offset_25 / 4]), asfloat(buffer[scalar_offset_26 / 4]));
+}
+
+typedef float3 tint_symbol_23_ret[2];
+tint_symbol_23_ret tint_symbol_23(uint4 buffer[272], uint offset) {
+  float3 arr_1[2] = (float3[2])0;
   {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      const uint scalar_offset_5 = ((offset + (i_1 * 16u))) / 4;
-      arr_1[i_1] = asint(buffer[scalar_offset_5 / 4]);
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      const uint scalar_offset_27 = ((offset + (i * 16u))) / 4;
+      arr_1[i] = asfloat(buffer[scalar_offset_27 / 4].xyz);
     }
   }
   return arr_1;
 }
 
 void main_inner(uint idx) {
-  const uint scalar_offset_6 = ((192u * idx)) / 4;
-  const int3 a = asint(s[scalar_offset_6 / 4].xyz);
-  const uint scalar_offset_7 = (((192u * idx) + 12u)) / 4;
-  const int b = asint(s[scalar_offset_7 / 4][scalar_offset_7 % 4]);
-  const uint scalar_offset_8 = (((192u * idx) + 16u)) / 4;
-  const uint3 c = s[scalar_offset_8 / 4].xyz;
-  const uint scalar_offset_9 = (((192u * idx) + 28u)) / 4;
-  const uint d = s[scalar_offset_9 / 4][scalar_offset_9 % 4];
-  const uint scalar_offset_10 = (((192u * idx) + 32u)) / 4;
-  const float3 e = asfloat(s[scalar_offset_10 / 4].xyz);
-  const uint scalar_offset_11 = (((192u * idx) + 44u)) / 4;
-  const float f = asfloat(s[scalar_offset_11 / 4][scalar_offset_11 % 4]);
-  const uint scalar_offset_12 = (((192u * idx) + 48u)) / 4;
-  uint4 ubo_load_3 = s[scalar_offset_12 / 4];
-  const int2 g = asint(((scalar_offset_12 & 2) ? ubo_load_3.zw : ubo_load_3.xy));
-  const uint scalar_offset_13 = (((192u * idx) + 56u)) / 4;
-  uint4 ubo_load_4 = s[scalar_offset_13 / 4];
-  const int2 h = asint(((scalar_offset_13 & 2) ? ubo_load_4.zw : ubo_load_4.xy));
-  const float2x3 i = tint_symbol_9(s, ((192u * idx) + 64u));
-  const float3x2 j = tint_symbol_10(s, ((192u * idx) + 96u));
-  const int4 k[4] = tint_symbol_12(s, ((192u * idx) + 128u));
+  const uint scalar_offset_28 = ((544u * idx)) / 4;
+  const float scalar_f32 = asfloat(ub[scalar_offset_28 / 4][scalar_offset_28 % 4]);
+  const uint scalar_offset_29 = (((544u * idx) + 4u)) / 4;
+  const int scalar_i32 = asint(ub[scalar_offset_29 / 4][scalar_offset_29 % 4]);
+  const uint scalar_offset_30 = (((544u * idx) + 8u)) / 4;
+  const uint scalar_u32 = ub[scalar_offset_30 / 4][scalar_offset_30 % 4];
+  const uint scalar_offset_31 = (((544u * idx) + 16u)) / 4;
+  uint4 ubo_load_9 = ub[scalar_offset_31 / 4];
+  const float2 vec2_f32 = asfloat(((scalar_offset_31 & 2) ? ubo_load_9.zw : ubo_load_9.xy));
+  const uint scalar_offset_32 = (((544u * idx) + 24u)) / 4;
+  uint4 ubo_load_10 = ub[scalar_offset_32 / 4];
+  const int2 vec2_i32 = asint(((scalar_offset_32 & 2) ? ubo_load_10.zw : ubo_load_10.xy));
+  const uint scalar_offset_33 = (((544u * idx) + 32u)) / 4;
+  uint4 ubo_load_11 = ub[scalar_offset_33 / 4];
+  const uint2 vec2_u32 = ((scalar_offset_33 & 2) ? ubo_load_11.zw : ubo_load_11.xy);
+  const uint scalar_offset_34 = (((544u * idx) + 48u)) / 4;
+  const float3 vec3_f32 = asfloat(ub[scalar_offset_34 / 4].xyz);
+  const uint scalar_offset_35 = (((544u * idx) + 64u)) / 4;
+  const int3 vec3_i32 = asint(ub[scalar_offset_35 / 4].xyz);
+  const uint scalar_offset_36 = (((544u * idx) + 80u)) / 4;
+  const uint3 vec3_u32 = ub[scalar_offset_36 / 4].xyz;
+  const uint scalar_offset_37 = (((544u * idx) + 96u)) / 4;
+  const float4 vec4_f32 = asfloat(ub[scalar_offset_37 / 4]);
+  const uint scalar_offset_38 = (((544u * idx) + 112u)) / 4;
+  const int4 vec4_i32 = asint(ub[scalar_offset_38 / 4]);
+  const uint scalar_offset_39 = (((544u * idx) + 128u)) / 4;
+  const uint4 vec4_u32 = ub[scalar_offset_39 / 4];
+  const float2x2 mat2x2_f32 = tint_symbol_14(ub, ((544u * idx) + 144u));
+  const float2x3 mat2x3_f32 = tint_symbol_15(ub, ((544u * idx) + 160u));
+  const float2x4 mat2x4_f32 = tint_symbol_16(ub, ((544u * idx) + 192u));
+  const float3x2 mat3x2_f32 = tint_symbol_17(ub, ((544u * idx) + 224u));
+  const float3x3 mat3x3_f32 = tint_symbol_18(ub, ((544u * idx) + 256u));
+  const float3x4 mat3x4_f32 = tint_symbol_19(ub, ((544u * idx) + 304u));
+  const float4x2 mat4x2_f32 = tint_symbol_20(ub, ((544u * idx) + 352u));
+  const float4x3 mat4x3_f32 = tint_symbol_21(ub, ((544u * idx) + 384u));
+  const float4x4 mat4x4_f32 = tint_symbol_22(ub, ((544u * idx) + 448u));
+  const float3 arr2_vec3_f32[2] = tint_symbol_23(ub, ((544u * idx) + 512u));
 }
 
 [numthreads(1, 1, 1)]

diff --git a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.fxc.hlsl
index b110fb7..4e80c66 100644
--- a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.fxc.hlsl

@@ -1,61 +1,133 @@
-cbuffer cbuffer_s : register(b0, space0) {
-  uint4 s[96];
+cbuffer cbuffer_ub : register(b0, space0) {
+  uint4 ub[272];
 };
 
 struct tint_symbol_1 {
   uint idx : SV_GroupIndex;
 };
 
-float2x3 tint_symbol_9(uint4 buffer[96], uint offset) {
+float2x2 tint_symbol_14(uint4 buffer[272], uint offset) {
   const uint scalar_offset = ((offset + 0u)) / 4;
-  const uint scalar_offset_1 = ((offset + 16u)) / 4;
-  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
 }
 
-float3x2 tint_symbol_10(uint4 buffer[96], uint offset) {
+float2x3 tint_symbol_15(uint4 buffer[272], uint offset) {
   const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_3 / 4];
-  const uint scalar_offset_4 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_4 / 4];
-  return float3x2(asfloat(((scalar_offset_2 & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_4 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+  const uint scalar_offset_3 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
 }
 
-typedef int4 tint_symbol_12_ret[4];
-tint_symbol_12_ret tint_symbol_12(uint4 buffer[96], uint offset) {
-  int4 arr_1[4] = (int4[4])0;
+float2x4 tint_symbol_16(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset_4 / 4]), asfloat(buffer[scalar_offset_5 / 4]));
+}
+
+float3x2 tint_symbol_17(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_6 = ((offset + 0u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_6 / 4];
+  const uint scalar_offset_7 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_7 / 4];
+  const uint scalar_offset_8 = ((offset + 16u)) / 4;
+  uint4 ubo_load_4 = buffer[scalar_offset_8 / 4];
+  return float3x2(asfloat(((scalar_offset_6 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_7 & 2) ? ubo_load_3.zw : ubo_load_3.xy)), asfloat(((scalar_offset_8 & 2) ? ubo_load_4.zw : ubo_load_4.xy)));
+}
+
+float3x3 tint_symbol_18(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_9 = ((offset + 0u)) / 4;
+  const uint scalar_offset_10 = ((offset + 16u)) / 4;
+  const uint scalar_offset_11 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset_9 / 4].xyz), asfloat(buffer[scalar_offset_10 / 4].xyz), asfloat(buffer[scalar_offset_11 / 4].xyz));
+}
+
+float3x4 tint_symbol_19(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_12 = ((offset + 0u)) / 4;
+  const uint scalar_offset_13 = ((offset + 16u)) / 4;
+  const uint scalar_offset_14 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset_12 / 4]), asfloat(buffer[scalar_offset_13 / 4]), asfloat(buffer[scalar_offset_14 / 4]));
+}
+
+float4x2 tint_symbol_20(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_15 = ((offset + 0u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_15 / 4];
+  const uint scalar_offset_16 = ((offset + 8u)) / 4;
+  uint4 ubo_load_6 = buffer[scalar_offset_16 / 4];
+  const uint scalar_offset_17 = ((offset + 16u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_17 / 4];
+  const uint scalar_offset_18 = ((offset + 24u)) / 4;
+  uint4 ubo_load_8 = buffer[scalar_offset_18 / 4];
+  return float4x2(asfloat(((scalar_offset_15 & 2) ? ubo_load_5.zw : ubo_load_5.xy)), asfloat(((scalar_offset_16 & 2) ? ubo_load_6.zw : ubo_load_6.xy)), asfloat(((scalar_offset_17 & 2) ? ubo_load_7.zw : ubo_load_7.xy)), asfloat(((scalar_offset_18 & 2) ? ubo_load_8.zw : ubo_load_8.xy)));
+}
+
+float4x3 tint_symbol_21(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_19 = ((offset + 0u)) / 4;
+  const uint scalar_offset_20 = ((offset + 16u)) / 4;
+  const uint scalar_offset_21 = ((offset + 32u)) / 4;
+  const uint scalar_offset_22 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset_19 / 4].xyz), asfloat(buffer[scalar_offset_20 / 4].xyz), asfloat(buffer[scalar_offset_21 / 4].xyz), asfloat(buffer[scalar_offset_22 / 4].xyz));
+}
+
+float4x4 tint_symbol_22(uint4 buffer[272], uint offset) {
+  const uint scalar_offset_23 = ((offset + 0u)) / 4;
+  const uint scalar_offset_24 = ((offset + 16u)) / 4;
+  const uint scalar_offset_25 = ((offset + 32u)) / 4;
+  const uint scalar_offset_26 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset_23 / 4]), asfloat(buffer[scalar_offset_24 / 4]), asfloat(buffer[scalar_offset_25 / 4]), asfloat(buffer[scalar_offset_26 / 4]));
+}
+
+typedef float3 tint_symbol_23_ret[2];
+tint_symbol_23_ret tint_symbol_23(uint4 buffer[272], uint offset) {
+  float3 arr_1[2] = (float3[2])0;
   {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      const uint scalar_offset_5 = ((offset + (i_1 * 16u))) / 4;
-      arr_1[i_1] = asint(buffer[scalar_offset_5 / 4]);
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      const uint scalar_offset_27 = ((offset + (i * 16u))) / 4;
+      arr_1[i] = asfloat(buffer[scalar_offset_27 / 4].xyz);
     }
   }
   return arr_1;
 }
 
 void main_inner(uint idx) {
-  const uint scalar_offset_6 = ((192u * idx)) / 4;
-  const int3 a = asint(s[scalar_offset_6 / 4].xyz);
-  const uint scalar_offset_7 = (((192u * idx) + 12u)) / 4;
-  const int b = asint(s[scalar_offset_7 / 4][scalar_offset_7 % 4]);
-  const uint scalar_offset_8 = (((192u * idx) + 16u)) / 4;
-  const uint3 c = s[scalar_offset_8 / 4].xyz;
-  const uint scalar_offset_9 = (((192u * idx) + 28u)) / 4;
-  const uint d = s[scalar_offset_9 / 4][scalar_offset_9 % 4];
-  const uint scalar_offset_10 = (((192u * idx) + 32u)) / 4;
-  const float3 e = asfloat(s[scalar_offset_10 / 4].xyz);
-  const uint scalar_offset_11 = (((192u * idx) + 44u)) / 4;
-  const float f = asfloat(s[scalar_offset_11 / 4][scalar_offset_11 % 4]);
-  const uint scalar_offset_12 = (((192u * idx) + 48u)) / 4;
-  uint4 ubo_load_3 = s[scalar_offset_12 / 4];
-  const int2 g = asint(((scalar_offset_12 & 2) ? ubo_load_3.zw : ubo_load_3.xy));
-  const uint scalar_offset_13 = (((192u * idx) + 56u)) / 4;
-  uint4 ubo_load_4 = s[scalar_offset_13 / 4];
-  const int2 h = asint(((scalar_offset_13 & 2) ? ubo_load_4.zw : ubo_load_4.xy));
-  const float2x3 i = tint_symbol_9(s, ((192u * idx) + 64u));
-  const float3x2 j = tint_symbol_10(s, ((192u * idx) + 96u));
-  const int4 k[4] = tint_symbol_12(s, ((192u * idx) + 128u));
+  const uint scalar_offset_28 = ((544u * idx)) / 4;
+  const float scalar_f32 = asfloat(ub[scalar_offset_28 / 4][scalar_offset_28 % 4]);
+  const uint scalar_offset_29 = (((544u * idx) + 4u)) / 4;
+  const int scalar_i32 = asint(ub[scalar_offset_29 / 4][scalar_offset_29 % 4]);
+  const uint scalar_offset_30 = (((544u * idx) + 8u)) / 4;
+  const uint scalar_u32 = ub[scalar_offset_30 / 4][scalar_offset_30 % 4];
+  const uint scalar_offset_31 = (((544u * idx) + 16u)) / 4;
+  uint4 ubo_load_9 = ub[scalar_offset_31 / 4];
+  const float2 vec2_f32 = asfloat(((scalar_offset_31 & 2) ? ubo_load_9.zw : ubo_load_9.xy));
+  const uint scalar_offset_32 = (((544u * idx) + 24u)) / 4;
+  uint4 ubo_load_10 = ub[scalar_offset_32 / 4];
+  const int2 vec2_i32 = asint(((scalar_offset_32 & 2) ? ubo_load_10.zw : ubo_load_10.xy));
+  const uint scalar_offset_33 = (((544u * idx) + 32u)) / 4;
+  uint4 ubo_load_11 = ub[scalar_offset_33 / 4];
+  const uint2 vec2_u32 = ((scalar_offset_33 & 2) ? ubo_load_11.zw : ubo_load_11.xy);
+  const uint scalar_offset_34 = (((544u * idx) + 48u)) / 4;
+  const float3 vec3_f32 = asfloat(ub[scalar_offset_34 / 4].xyz);
+  const uint scalar_offset_35 = (((544u * idx) + 64u)) / 4;
+  const int3 vec3_i32 = asint(ub[scalar_offset_35 / 4].xyz);
+  const uint scalar_offset_36 = (((544u * idx) + 80u)) / 4;
+  const uint3 vec3_u32 = ub[scalar_offset_36 / 4].xyz;
+  const uint scalar_offset_37 = (((544u * idx) + 96u)) / 4;
+  const float4 vec4_f32 = asfloat(ub[scalar_offset_37 / 4]);
+  const uint scalar_offset_38 = (((544u * idx) + 112u)) / 4;
+  const int4 vec4_i32 = asint(ub[scalar_offset_38 / 4]);
+  const uint scalar_offset_39 = (((544u * idx) + 128u)) / 4;
+  const uint4 vec4_u32 = ub[scalar_offset_39 / 4];
+  const float2x2 mat2x2_f32 = tint_symbol_14(ub, ((544u * idx) + 144u));
+  const float2x3 mat2x3_f32 = tint_symbol_15(ub, ((544u * idx) + 160u));
+  const float2x4 mat2x4_f32 = tint_symbol_16(ub, ((544u * idx) + 192u));
+  const float3x2 mat3x2_f32 = tint_symbol_17(ub, ((544u * idx) + 224u));
+  const float3x3 mat3x3_f32 = tint_symbol_18(ub, ((544u * idx) + 256u));
+  const float3x4 mat3x4_f32 = tint_symbol_19(ub, ((544u * idx) + 304u));
+  const float4x2 mat4x2_f32 = tint_symbol_20(ub, ((544u * idx) + 352u));
+  const float4x3 mat4x3_f32 = tint_symbol_21(ub, ((544u * idx) + 384u));
+  const float4x4 mat4x4_f32 = tint_symbol_22(ub, ((544u * idx) + 448u));
+  const float3 arr2_vec3_f32[2] = tint_symbol_23(ub, ((544u * idx) + 512u));
 }
 
 [numthreads(1, 1, 1)]

diff --git a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.glsl b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.glsl
index 7c900f4..521d84e 100644
--- a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.glsl

@@ -1,37 +1,75 @@
 #version 310 es
 
 struct Inner {
-  ivec3 a;
-  int b;
-  uvec3 c;
-  uint d;
-  vec3 e;
-  float f;
-  ivec2 g;
-  ivec2 h;
-  mat2x3 i;
-  mat3x2 j;
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
   uint pad;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
   uint pad_1;
-  ivec4 k[4];
+  uint pad_2;
+  vec3 vec3_f32;
+  uint pad_3;
+  ivec3 vec3_i32;
+  uint pad_4;
+  uvec3 vec3_u32;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  mat2 mat2x2_f32;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_6;
+  uint pad_7;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  vec3 arr2_vec3_f32[2];
 };
 
 struct Inner_std140 {
-  ivec3 a;
-  int b;
-  uvec3 c;
-  uint d;
-  vec3 e;
-  float f;
-  ivec2 g;
-  ivec2 h;
-  mat2x3 i;
-  vec2 j_0;
-  vec2 j_1;
-  vec2 j_2;
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
   uint pad;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
   uint pad_1;
-  ivec4 k[4];
+  uint pad_2;
+  vec3 vec3_f32;
+  uint pad_3;
+  ivec3 vec3_i32;
+  uint pad_4;
+  uvec3 vec3_u32;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  vec2 mat2x2_f32_0;
+  vec2 mat2x2_f32_1;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  vec2 mat3x2_f32_0;
+  vec2 mat3x2_f32_1;
+  vec2 mat3x2_f32_2;
+  uint pad_6;
+  uint pad_7;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  vec2 mat4x2_f32_0;
+  vec2 mat4x2_f32_1;
+  vec2 mat4x2_f32_2;
+  vec2 mat4x2_f32_3;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  vec3 arr2_vec3_f32[2];
 };
 
 struct S {
@@ -42,27 +80,48 @@
   Inner_std140 arr[8];
 };
 
-layout(binding = 0, std140) uniform s_block_std140_ubo {
+layout(binding = 0, std140) uniform ub_block_std140_ubo {
   S_std140 inner;
-} s;
+} ub;
 
-mat3x2 load_s_inner_arr_p0_j(uint p0) {
+mat2 load_ub_inner_arr_p0_mat2x2_f32(uint p0) {
   uint s_save = p0;
-  return mat3x2(s.inner.arr[s_save].j_0, s.inner.arr[s_save].j_1, s.inner.arr[s_save].j_2);
+  return mat2(ub.inner.arr[s_save].mat2x2_f32_0, ub.inner.arr[s_save].mat2x2_f32_1);
+}
+
+mat3x2 load_ub_inner_arr_p0_mat3x2_f32(uint p0) {
+  uint s_save_1 = p0;
+  return mat3x2(ub.inner.arr[s_save_1].mat3x2_f32_0, ub.inner.arr[s_save_1].mat3x2_f32_1, ub.inner.arr[s_save_1].mat3x2_f32_2);
+}
+
+mat4x2 load_ub_inner_arr_p0_mat4x2_f32(uint p0) {
+  uint s_save_2 = p0;
+  return mat4x2(ub.inner.arr[s_save_2].mat4x2_f32_0, ub.inner.arr[s_save_2].mat4x2_f32_1, ub.inner.arr[s_save_2].mat4x2_f32_2, ub.inner.arr[s_save_2].mat4x2_f32_3);
 }
 
 void tint_symbol(uint idx) {
-  ivec3 a = s.inner.arr[idx].a;
-  int b = s.inner.arr[idx].b;
-  uvec3 c = s.inner.arr[idx].c;
-  uint d = s.inner.arr[idx].d;
-  vec3 e = s.inner.arr[idx].e;
-  float f = s.inner.arr[idx].f;
-  ivec2 g = s.inner.arr[idx].g;
-  ivec2 h = s.inner.arr[idx].h;
-  mat2x3 i = s.inner.arr[idx].i;
-  mat3x2 j = load_s_inner_arr_p0_j(uint(idx));
-  ivec4 k[4] = s.inner.arr[idx].k;
+  float scalar_f32 = ub.inner.arr[idx].scalar_f32;
+  int scalar_i32 = ub.inner.arr[idx].scalar_i32;
+  uint scalar_u32 = ub.inner.arr[idx].scalar_u32;
+  vec2 vec2_f32 = ub.inner.arr[idx].vec2_f32;
+  ivec2 vec2_i32 = ub.inner.arr[idx].vec2_i32;
+  uvec2 vec2_u32 = ub.inner.arr[idx].vec2_u32;
+  vec3 vec3_f32 = ub.inner.arr[idx].vec3_f32;
+  ivec3 vec3_i32 = ub.inner.arr[idx].vec3_i32;
+  uvec3 vec3_u32 = ub.inner.arr[idx].vec3_u32;
+  vec4 vec4_f32 = ub.inner.arr[idx].vec4_f32;
+  ivec4 vec4_i32 = ub.inner.arr[idx].vec4_i32;
+  uvec4 vec4_u32 = ub.inner.arr[idx].vec4_u32;
+  mat2 mat2x2_f32 = load_ub_inner_arr_p0_mat2x2_f32(uint(idx));
+  mat2x3 mat2x3_f32 = ub.inner.arr[idx].mat2x3_f32;
+  mat2x4 mat2x4_f32 = ub.inner.arr[idx].mat2x4_f32;
+  mat3x2 mat3x2_f32 = load_ub_inner_arr_p0_mat3x2_f32(uint(idx));
+  mat3 mat3x3_f32 = ub.inner.arr[idx].mat3x3_f32;
+  mat3x4 mat3x4_f32 = ub.inner.arr[idx].mat3x4_f32;
+  mat4x2 mat4x2_f32 = load_ub_inner_arr_p0_mat4x2_f32(uint(idx));
+  mat4x3 mat4x3_f32 = ub.inner.arr[idx].mat4x3_f32;
+  mat4 mat4x4_f32 = ub.inner.arr[idx].mat4x4_f32;
+  vec3 arr2_vec3_f32[2] = ub.inner.arr[idx].arr2_vec3_f32;
 }
 
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

diff --git a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.msl b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.msl
index 37817c3..f976bd1 100644
--- a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.msl

@@ -15,18 +15,34 @@
 };
 
 struct Inner {
-  /* 0x0000 */ packed_int3 a;
-  /* 0x000c */ int b;
-  /* 0x0010 */ packed_uint3 c;
-  /* 0x001c */ uint d;
-  /* 0x0020 */ packed_float3 e;
-  /* 0x002c */ float f;
-  /* 0x0030 */ int2 g;
-  /* 0x0038 */ int2 h;
-  /* 0x0040 */ float2x3 i;
-  /* 0x0060 */ float3x2 j;
-  /* 0x0078 */ tint_array<int8_t, 8> tint_pad;
-  /* 0x0080 */ tint_array<int4, 4> k;
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ tint_array<int8_t, 8> tint_pad_1;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_3;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0060 */ float4 vec4_f32;
+  /* 0x0070 */ int4 vec4_i32;
+  /* 0x0080 */ uint4 vec4_u32;
+  /* 0x0090 */ float2x2 mat2x2_f32;
+  /* 0x00a0 */ float2x3 mat2x3_f32;
+  /* 0x00c0 */ float2x4 mat2x4_f32;
+  /* 0x00e0 */ float3x2 mat3x2_f32;
+  /* 0x00f8 */ tint_array<int8_t, 8> tint_pad_5;
+  /* 0x0100 */ float3x3 mat3x3_f32;
+  /* 0x0130 */ float3x4 mat3x4_f32;
+  /* 0x0160 */ float4x2 mat4x2_f32;
+  /* 0x0180 */ float4x3 mat4x3_f32;
+  /* 0x01c0 */ float4x4 mat4x4_f32;
+  /* 0x0200 */ tint_array<float3, 2> arr2_vec3_f32;
 };
 
 struct S {
@@ -34,17 +50,28 @@
 };
 
 void tint_symbol_inner(uint idx, const constant S* const tint_symbol_1) {
-  int3 const a = int3((*(tint_symbol_1)).arr[idx].a);
-  int const b = (*(tint_symbol_1)).arr[idx].b;
-  uint3 const c = uint3((*(tint_symbol_1)).arr[idx].c);
-  uint const d = (*(tint_symbol_1)).arr[idx].d;
-  float3 const e = float3((*(tint_symbol_1)).arr[idx].e);
-  float const f = (*(tint_symbol_1)).arr[idx].f;
-  int2 const g = (*(tint_symbol_1)).arr[idx].g;
-  int2 const h = (*(tint_symbol_1)).arr[idx].h;
-  float2x3 const i = (*(tint_symbol_1)).arr[idx].i;
-  float3x2 const j = (*(tint_symbol_1)).arr[idx].j;
-  tint_array<int4, 4> const k = (*(tint_symbol_1)).arr[idx].k;
+  float const scalar_f32 = (*(tint_symbol_1)).arr[idx].scalar_f32;
+  int const scalar_i32 = (*(tint_symbol_1)).arr[idx].scalar_i32;
+  uint const scalar_u32 = (*(tint_symbol_1)).arr[idx].scalar_u32;
+  float2 const vec2_f32 = (*(tint_symbol_1)).arr[idx].vec2_f32;
+  int2 const vec2_i32 = (*(tint_symbol_1)).arr[idx].vec2_i32;
+  uint2 const vec2_u32 = (*(tint_symbol_1)).arr[idx].vec2_u32;
+  float3 const vec3_f32 = float3((*(tint_symbol_1)).arr[idx].vec3_f32);
+  int3 const vec3_i32 = int3((*(tint_symbol_1)).arr[idx].vec3_i32);
+  uint3 const vec3_u32 = uint3((*(tint_symbol_1)).arr[idx].vec3_u32);
+  float4 const vec4_f32 = (*(tint_symbol_1)).arr[idx].vec4_f32;
+  int4 const vec4_i32 = (*(tint_symbol_1)).arr[idx].vec4_i32;
+  uint4 const vec4_u32 = (*(tint_symbol_1)).arr[idx].vec4_u32;
+  float2x2 const mat2x2_f32 = (*(tint_symbol_1)).arr[idx].mat2x2_f32;
+  float2x3 const mat2x3_f32 = (*(tint_symbol_1)).arr[idx].mat2x3_f32;
+  float2x4 const mat2x4_f32 = (*(tint_symbol_1)).arr[idx].mat2x4_f32;
+  float3x2 const mat3x2_f32 = (*(tint_symbol_1)).arr[idx].mat3x2_f32;
+  float3x3 const mat3x3_f32 = (*(tint_symbol_1)).arr[idx].mat3x3_f32;
+  float3x4 const mat3x4_f32 = (*(tint_symbol_1)).arr[idx].mat3x4_f32;
+  float4x2 const mat4x2_f32 = (*(tint_symbol_1)).arr[idx].mat4x2_f32;
+  float4x3 const mat4x3_f32 = (*(tint_symbol_1)).arr[idx].mat4x3_f32;
+  float4x4 const mat4x4_f32 = (*(tint_symbol_1)).arr[idx].mat4x4_f32;
+  tint_array<float3, 2> const arr2_vec3_f32 = (*(tint_symbol_1)).arr[idx].arr2_vec3_f32;
 }
 
 kernel void tint_symbol(const constant S* tint_symbol_2 [[buffer(0)]], uint idx [[thread_index_in_threadgroup]]) {

diff --git a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.spvasm b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.spvasm
index e8c067c..1781177 100644
--- a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.spvasm

@@ -1,151 +1,277 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 0
-; Bound: 93
+; Bound: 181
 ; Schema: 0
                OpCapability Shader
                OpMemoryModel Logical GLSL450
                OpEntryPoint GLCompute %main "main" %idx_1
                OpExecutionMode %main LocalSize 1 1 1
                OpName %idx_1 "idx_1"
-               OpName %s_block_std140 "s_block_std140"
-               OpMemberName %s_block_std140 0 "inner"
+               OpName %ub_block_std140 "ub_block_std140"
+               OpMemberName %ub_block_std140 0 "inner"
                OpName %S_std140 "S_std140"
                OpMemberName %S_std140 0 "arr"
                OpName %Inner_std140 "Inner_std140"
-               OpMemberName %Inner_std140 0 "a"
-               OpMemberName %Inner_std140 1 "b"
-               OpMemberName %Inner_std140 2 "c"
-               OpMemberName %Inner_std140 3 "d"
-               OpMemberName %Inner_std140 4 "e"
-               OpMemberName %Inner_std140 5 "f"
-               OpMemberName %Inner_std140 6 "g"
-               OpMemberName %Inner_std140 7 "h"
-               OpMemberName %Inner_std140 8 "i"
-               OpMemberName %Inner_std140 9 "j_0"
-               OpMemberName %Inner_std140 10 "j_1"
-               OpMemberName %Inner_std140 11 "j_2"
-               OpMemberName %Inner_std140 12 "k"
-               OpName %s "s"
-               OpName %load_s_inner_arr_p0_j "load_s_inner_arr_p0_j"
+               OpMemberName %Inner_std140 0 "scalar_f32"
+               OpMemberName %Inner_std140 1 "scalar_i32"
+               OpMemberName %Inner_std140 2 "scalar_u32"
+               OpMemberName %Inner_std140 3 "vec2_f32"
+               OpMemberName %Inner_std140 4 "vec2_i32"
+               OpMemberName %Inner_std140 5 "vec2_u32"
+               OpMemberName %Inner_std140 6 "vec3_f32"
+               OpMemberName %Inner_std140 7 "vec3_i32"
+               OpMemberName %Inner_std140 8 "vec3_u32"
+               OpMemberName %Inner_std140 9 "vec4_f32"
+               OpMemberName %Inner_std140 10 "vec4_i32"
+               OpMemberName %Inner_std140 11 "vec4_u32"
+               OpMemberName %Inner_std140 12 "mat2x2_f32_0"
+               OpMemberName %Inner_std140 13 "mat2x2_f32_1"
+               OpMemberName %Inner_std140 14 "mat2x3_f32"
+               OpMemberName %Inner_std140 15 "mat2x4_f32"
+               OpMemberName %Inner_std140 16 "mat3x2_f32_0"
+               OpMemberName %Inner_std140 17 "mat3x2_f32_1"
+               OpMemberName %Inner_std140 18 "mat3x2_f32_2"
+               OpMemberName %Inner_std140 19 "mat3x3_f32"
+               OpMemberName %Inner_std140 20 "mat3x4_f32"
+               OpMemberName %Inner_std140 21 "mat4x2_f32_0"
+               OpMemberName %Inner_std140 22 "mat4x2_f32_1"
+               OpMemberName %Inner_std140 23 "mat4x2_f32_2"
+               OpMemberName %Inner_std140 24 "mat4x2_f32_3"
+               OpMemberName %Inner_std140 25 "mat4x3_f32"
+               OpMemberName %Inner_std140 26 "mat4x4_f32"
+               OpMemberName %Inner_std140 27 "arr2_vec3_f32"
+               OpName %ub "ub"
+               OpName %load_ub_inner_arr_p0_mat2x2_f32 "load_ub_inner_arr_p0_mat2x2_f32"
                OpName %p0 "p0"
+               OpName %load_ub_inner_arr_p0_mat3x2_f32 "load_ub_inner_arr_p0_mat3x2_f32"
+               OpName %p0_0 "p0"
+               OpName %load_ub_inner_arr_p0_mat4x2_f32 "load_ub_inner_arr_p0_mat4x2_f32"
+               OpName %p0_1 "p0"
                OpName %main_inner "main_inner"
                OpName %idx "idx"
                OpName %main "main"
                OpDecorate %idx_1 BuiltIn LocalInvocationIndex
-               OpDecorate %s_block_std140 Block
-               OpMemberDecorate %s_block_std140 0 Offset 0
+               OpDecorate %ub_block_std140 Block
+               OpMemberDecorate %ub_block_std140 0 Offset 0
                OpMemberDecorate %S_std140 0 Offset 0
                OpMemberDecorate %Inner_std140 0 Offset 0
-               OpMemberDecorate %Inner_std140 1 Offset 12
-               OpMemberDecorate %Inner_std140 2 Offset 16
-               OpMemberDecorate %Inner_std140 3 Offset 28
-               OpMemberDecorate %Inner_std140 4 Offset 32
-               OpMemberDecorate %Inner_std140 5 Offset 44
+               OpMemberDecorate %Inner_std140 1 Offset 4
+               OpMemberDecorate %Inner_std140 2 Offset 8
+               OpMemberDecorate %Inner_std140 3 Offset 16
+               OpMemberDecorate %Inner_std140 4 Offset 24
+               OpMemberDecorate %Inner_std140 5 Offset 32
                OpMemberDecorate %Inner_std140 6 Offset 48
-               OpMemberDecorate %Inner_std140 7 Offset 56
-               OpMemberDecorate %Inner_std140 8 Offset 64
-               OpMemberDecorate %Inner_std140 8 ColMajor
-               OpMemberDecorate %Inner_std140 8 MatrixStride 16
+               OpMemberDecorate %Inner_std140 7 Offset 64
+               OpMemberDecorate %Inner_std140 8 Offset 80
                OpMemberDecorate %Inner_std140 9 Offset 96
-               OpMemberDecorate %Inner_std140 10 Offset 104
-               OpMemberDecorate %Inner_std140 11 Offset 112
-               OpMemberDecorate %Inner_std140 12 Offset 128
-               OpDecorate %_arr_v4int_uint_4 ArrayStride 16
-               OpDecorate %_arr_Inner_std140_uint_8 ArrayStride 192
-               OpDecorate %s NonWritable
-               OpDecorate %s Binding 0
-               OpDecorate %s DescriptorSet 0
+               OpMemberDecorate %Inner_std140 10 Offset 112
+               OpMemberDecorate %Inner_std140 11 Offset 128
+               OpMemberDecorate %Inner_std140 12 Offset 144
+               OpMemberDecorate %Inner_std140 13 Offset 152
+               OpMemberDecorate %Inner_std140 14 Offset 160
+               OpMemberDecorate %Inner_std140 14 ColMajor
+               OpMemberDecorate %Inner_std140 14 MatrixStride 16
+               OpMemberDecorate %Inner_std140 15 Offset 192
+               OpMemberDecorate %Inner_std140 15 ColMajor
+               OpMemberDecorate %Inner_std140 15 MatrixStride 16
+               OpMemberDecorate %Inner_std140 16 Offset 224
+               OpMemberDecorate %Inner_std140 17 Offset 232
+               OpMemberDecorate %Inner_std140 18 Offset 240
+               OpMemberDecorate %Inner_std140 19 Offset 256
+               OpMemberDecorate %Inner_std140 19 ColMajor
+               OpMemberDecorate %Inner_std140 19 MatrixStride 16
+               OpMemberDecorate %Inner_std140 20 Offset 304
+               OpMemberDecorate %Inner_std140 20 ColMajor
+               OpMemberDecorate %Inner_std140 20 MatrixStride 16
+               OpMemberDecorate %Inner_std140 21 Offset 352
+               OpMemberDecorate %Inner_std140 22 Offset 360
+               OpMemberDecorate %Inner_std140 23 Offset 368
+               OpMemberDecorate %Inner_std140 24 Offset 376
+               OpMemberDecorate %Inner_std140 25 Offset 384
+               OpMemberDecorate %Inner_std140 25 ColMajor
+               OpMemberDecorate %Inner_std140 25 MatrixStride 16
+               OpMemberDecorate %Inner_std140 26 Offset 448
+               OpMemberDecorate %Inner_std140 26 ColMajor
+               OpMemberDecorate %Inner_std140 26 MatrixStride 16
+               OpMemberDecorate %Inner_std140 27 Offset 512
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpDecorate %_arr_Inner_std140_uint_8 ArrayStride 544
+               OpDecorate %ub NonWritable
+               OpDecorate %ub Binding 0
+               OpDecorate %ub DescriptorSet 0
        %uint = OpTypeInt 32 0
 %_ptr_Input_uint = OpTypePointer Input %uint
       %idx_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
         %int = OpTypeInt 32 1
+    %v2float = OpTypeVector %float 2
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+    %v3float = OpTypeVector %float 3
       %v3int = OpTypeVector %int 3
      %v3uint = OpTypeVector %uint 3
-      %float = OpTypeFloat 32
-    %v3float = OpTypeVector %float 3
-      %v2int = OpTypeVector %int 2
-%mat2v3float = OpTypeMatrix %v3float 2
-    %v2float = OpTypeVector %float 2
+    %v4float = OpTypeVector %float 4
       %v4int = OpTypeVector %int 4
-     %uint_4 = OpConstant %uint 4
-%_arr_v4int_uint_4 = OpTypeArray %v4int %uint_4
-%Inner_std140 = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %v2int %v2int %mat2v3float %v2float %v2float %v2float %_arr_v4int_uint_4
+     %v4uint = OpTypeVector %uint 4
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+%Inner_std140 = OpTypeStruct %float %int %uint %v2float %v2int %v2uint %v3float %v3int %v3uint %v4float %v4int %v4uint %v2float %v2float %mat2v3float %mat2v4float %v2float %v2float %v2float %mat3v3float %mat3v4float %v2float %v2float %v2float %v2float %mat4v3float %mat4v4float %_arr_v3float_uint_2
      %uint_8 = OpConstant %uint 8
 %_arr_Inner_std140_uint_8 = OpTypeArray %Inner_std140 %uint_8
    %S_std140 = OpTypeStruct %_arr_Inner_std140_uint_8
-%s_block_std140 = OpTypeStruct %S_std140
-%_ptr_Uniform_s_block_std140 = OpTypePointer Uniform %s_block_std140
-          %s = OpVariable %_ptr_Uniform_s_block_std140 Uniform
-%mat3v2float = OpTypeMatrix %v2float 3
-         %22 = OpTypeFunction %mat3v2float %uint
+%ub_block_std140 = OpTypeStruct %S_std140
+%_ptr_Uniform_ub_block_std140 = OpTypePointer Uniform %ub_block_std140
+         %ub = OpVariable %_ptr_Uniform_ub_block_std140 Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+         %30 = OpTypeFunction %mat2v2float %uint
      %uint_0 = OpConstant %uint 0
 %_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
-     %uint_9 = OpConstant %uint 9
+    %uint_12 = OpConstant %uint 12
 %_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-    %uint_10 = OpConstant %uint 10
-    %uint_11 = OpConstant %uint 11
+    %uint_13 = OpConstant %uint 13
+%mat3v2float = OpTypeMatrix %v2float 3
+         %49 = OpTypeFunction %mat3v2float %uint
+    %uint_16 = OpConstant %uint 16
+    %uint_17 = OpConstant %uint 17
+    %uint_18 = OpConstant %uint 18
+%mat4v2float = OpTypeMatrix %v2float 4
+         %69 = OpTypeFunction %mat4v2float %uint
+    %uint_21 = OpConstant %uint 21
+    %uint_22 = OpConstant %uint 22
+    %uint_23 = OpConstant %uint 23
+    %uint_24 = OpConstant %uint 24
        %void = OpTypeVoid
-         %45 = OpTypeFunction %void %uint
-%_ptr_Uniform_v3int = OpTypePointer Uniform %v3int
+         %93 = OpTypeFunction %void %uint
+%_ptr_Uniform_float = OpTypePointer Uniform %float
      %uint_1 = OpConstant %uint 1
 %_ptr_Uniform_int = OpTypePointer Uniform %int
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_v3uint = OpTypePointer Uniform %v3uint
-     %uint_3 = OpConstant %uint 3
 %_ptr_Uniform_uint = OpTypePointer Uniform %uint
-%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
-     %uint_5 = OpConstant %uint 5
-%_ptr_Uniform_float = OpTypePointer Uniform %float
-     %uint_6 = OpConstant %uint 6
+     %uint_3 = OpConstant %uint 3
+     %uint_4 = OpConstant %uint 4
 %_ptr_Uniform_v2int = OpTypePointer Uniform %v2int
+     %uint_5 = OpConstant %uint 5
+%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
+     %uint_6 = OpConstant %uint 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
      %uint_7 = OpConstant %uint 7
+%_ptr_Uniform_v3int = OpTypePointer Uniform %v3int
+%_ptr_Uniform_v3uint = OpTypePointer Uniform %v3uint
+     %uint_9 = OpConstant %uint 9
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+    %uint_10 = OpConstant %uint 10
+%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
+    %uint_11 = OpConstant %uint 11
+%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint
+    %uint_14 = OpConstant %uint 14
 %_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
-    %uint_12 = OpConstant %uint 12
-%_ptr_Uniform__arr_v4int_uint_4 = OpTypePointer Uniform %_arr_v4int_uint_4
-         %88 = OpTypeFunction %void
-%load_s_inner_arr_p0_j = OpFunction %mat3v2float None %22
+    %uint_15 = OpConstant %uint 15
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+    %uint_19 = OpConstant %uint 19
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+    %uint_20 = OpConstant %uint 20
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+    %uint_25 = OpConstant %uint 25
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+    %uint_26 = OpConstant %uint 26
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+    %uint_27 = OpConstant %uint 27
+%_ptr_Uniform__arr_v3float_uint_2 = OpTypePointer Uniform %_arr_v3float_uint_2
+        %176 = OpTypeFunction %void
+%load_ub_inner_arr_p0_mat2x2_f32 = OpFunction %mat2v2float None %30
          %p0 = OpFunctionParameter %uint
-         %26 = OpLabel
-         %30 = OpAccessChain %_ptr_Uniform_Inner_std140 %s %uint_0 %uint_0 %p0
-         %34 = OpAccessChain %_ptr_Uniform_v2float %30 %uint_9
-         %35 = OpLoad %v2float %34
-         %38 = OpAccessChain %_ptr_Uniform_v2float %30 %uint_10
-         %39 = OpLoad %v2float %38
-         %42 = OpAccessChain %_ptr_Uniform_v2float %30 %uint_11
+         %34 = OpLabel
+         %38 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0
+         %42 = OpAccessChain %_ptr_Uniform_v2float %38 %uint_12
          %43 = OpLoad %v2float %42
-         %44 = OpCompositeConstruct %mat3v2float %35 %39 %43
-               OpReturnValue %44
+         %46 = OpAccessChain %_ptr_Uniform_v2float %38 %uint_13
+         %47 = OpLoad %v2float %46
+         %48 = OpCompositeConstruct %mat2v2float %43 %47
+               OpReturnValue %48
                OpFunctionEnd
- %main_inner = OpFunction %void None %45
+%load_ub_inner_arr_p0_mat3x2_f32 = OpFunction %mat3v2float None %49
+       %p0_0 = OpFunctionParameter %uint
+         %53 = OpLabel
+         %55 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_0
+         %58 = OpAccessChain %_ptr_Uniform_v2float %55 %uint_16
+         %59 = OpLoad %v2float %58
+         %62 = OpAccessChain %_ptr_Uniform_v2float %55 %uint_17
+         %63 = OpLoad %v2float %62
+         %66 = OpAccessChain %_ptr_Uniform_v2float %55 %uint_18
+         %67 = OpLoad %v2float %66
+         %68 = OpCompositeConstruct %mat3v2float %59 %63 %67
+               OpReturnValue %68
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat4x2_f32 = OpFunction %mat4v2float None %69
+       %p0_1 = OpFunctionParameter %uint
+         %73 = OpLabel
+         %75 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_1
+         %78 = OpAccessChain %_ptr_Uniform_v2float %75 %uint_21
+         %79 = OpLoad %v2float %78
+         %82 = OpAccessChain %_ptr_Uniform_v2float %75 %uint_22
+         %83 = OpLoad %v2float %82
+         %86 = OpAccessChain %_ptr_Uniform_v2float %75 %uint_23
+         %87 = OpLoad %v2float %86
+         %90 = OpAccessChain %_ptr_Uniform_v2float %75 %uint_24
+         %91 = OpLoad %v2float %90
+         %92 = OpCompositeConstruct %mat4v2float %79 %83 %87 %91
+               OpReturnValue %92
+               OpFunctionEnd
+ %main_inner = OpFunction %void None %93
         %idx = OpFunctionParameter %uint
-         %49 = OpLabel
-         %51 = OpAccessChain %_ptr_Uniform_v3int %s %uint_0 %uint_0 %idx %uint_0
-         %52 = OpLoad %v3int %51
-         %55 = OpAccessChain %_ptr_Uniform_int %s %uint_0 %uint_0 %idx %uint_1
-         %56 = OpLoad %int %55
-         %59 = OpAccessChain %_ptr_Uniform_v3uint %s %uint_0 %uint_0 %idx %uint_2
-         %60 = OpLoad %v3uint %59
-         %63 = OpAccessChain %_ptr_Uniform_uint %s %uint_0 %uint_0 %idx %uint_3
-         %64 = OpLoad %uint %63
-         %66 = OpAccessChain %_ptr_Uniform_v3float %s %uint_0 %uint_0 %idx %uint_4
-         %67 = OpLoad %v3float %66
-         %70 = OpAccessChain %_ptr_Uniform_float %s %uint_0 %uint_0 %idx %uint_5
-         %71 = OpLoad %float %70
-         %74 = OpAccessChain %_ptr_Uniform_v2int %s %uint_0 %uint_0 %idx %uint_6
-         %75 = OpLoad %v2int %74
-         %77 = OpAccessChain %_ptr_Uniform_v2int %s %uint_0 %uint_0 %idx %uint_7
-         %78 = OpLoad %v2int %77
-         %80 = OpAccessChain %_ptr_Uniform_mat2v3float %s %uint_0 %uint_0 %idx %uint_8
-         %81 = OpLoad %mat2v3float %80
-         %82 = OpFunctionCall %mat3v2float %load_s_inner_arr_p0_j %idx
-         %86 = OpAccessChain %_ptr_Uniform__arr_v4int_uint_4 %s %uint_0 %uint_0 %idx %uint_12
-         %87 = OpLoad %_arr_v4int_uint_4 %86
+         %97 = OpLabel
+         %99 = OpAccessChain %_ptr_Uniform_float %ub %uint_0 %uint_0 %idx %uint_0
+        %100 = OpLoad %float %99
+        %103 = OpAccessChain %_ptr_Uniform_int %ub %uint_0 %uint_0 %idx %uint_1
+        %104 = OpLoad %int %103
+        %106 = OpAccessChain %_ptr_Uniform_uint %ub %uint_0 %uint_0 %idx %uint_2
+        %107 = OpLoad %uint %106
+        %109 = OpAccessChain %_ptr_Uniform_v2float %ub %uint_0 %uint_0 %idx %uint_3
+        %110 = OpLoad %v2float %109
+        %113 = OpAccessChain %_ptr_Uniform_v2int %ub %uint_0 %uint_0 %idx %uint_4
+        %114 = OpLoad %v2int %113
+        %117 = OpAccessChain %_ptr_Uniform_v2uint %ub %uint_0 %uint_0 %idx %uint_5
+        %118 = OpLoad %v2uint %117
+        %121 = OpAccessChain %_ptr_Uniform_v3float %ub %uint_0 %uint_0 %idx %uint_6
+        %122 = OpLoad %v3float %121
+        %125 = OpAccessChain %_ptr_Uniform_v3int %ub %uint_0 %uint_0 %idx %uint_7
+        %126 = OpLoad %v3int %125
+        %128 = OpAccessChain %_ptr_Uniform_v3uint %ub %uint_0 %uint_0 %idx %uint_8
+        %129 = OpLoad %v3uint %128
+        %132 = OpAccessChain %_ptr_Uniform_v4float %ub %uint_0 %uint_0 %idx %uint_9
+        %133 = OpLoad %v4float %132
+        %136 = OpAccessChain %_ptr_Uniform_v4int %ub %uint_0 %uint_0 %idx %uint_10
+        %137 = OpLoad %v4int %136
+        %140 = OpAccessChain %_ptr_Uniform_v4uint %ub %uint_0 %uint_0 %idx %uint_11
+        %141 = OpLoad %v4uint %140
+        %142 = OpFunctionCall %mat2v2float %load_ub_inner_arr_p0_mat2x2_f32 %idx
+        %146 = OpAccessChain %_ptr_Uniform_mat2v3float %ub %uint_0 %uint_0 %idx %uint_14
+        %147 = OpLoad %mat2v3float %146
+        %150 = OpAccessChain %_ptr_Uniform_mat2v4float %ub %uint_0 %uint_0 %idx %uint_15
+        %151 = OpLoad %mat2v4float %150
+        %152 = OpFunctionCall %mat3v2float %load_ub_inner_arr_p0_mat3x2_f32 %idx
+        %156 = OpAccessChain %_ptr_Uniform_mat3v3float %ub %uint_0 %uint_0 %idx %uint_19
+        %157 = OpLoad %mat3v3float %156
+        %160 = OpAccessChain %_ptr_Uniform_mat3v4float %ub %uint_0 %uint_0 %idx %uint_20
+        %161 = OpLoad %mat3v4float %160
+        %162 = OpFunctionCall %mat4v2float %load_ub_inner_arr_p0_mat4x2_f32 %idx
+        %166 = OpAccessChain %_ptr_Uniform_mat4v3float %ub %uint_0 %uint_0 %idx %uint_25
+        %167 = OpLoad %mat4v3float %166
+        %170 = OpAccessChain %_ptr_Uniform_mat4v4float %ub %uint_0 %uint_0 %idx %uint_26
+        %171 = OpLoad %mat4v4float %170
+        %174 = OpAccessChain %_ptr_Uniform__arr_v3float_uint_2 %ub %uint_0 %uint_0 %idx %uint_27
+        %175 = OpLoad %_arr_v3float_uint_2 %174
                OpReturn
                OpFunctionEnd
-       %main = OpFunction %void None %88
-         %90 = OpLabel
-         %92 = OpLoad %uint %idx_1
-         %91 = OpFunctionCall %void %main_inner %92
+       %main = OpFunction %void None %176
+        %178 = OpLabel
+        %180 = OpLoad %uint %idx_1
+        %179 = OpFunctionCall %void %main_inner %180
                OpReturn
                OpFunctionEnd

diff --git a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.wgsl b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.wgsl
index 8c8e684..80d5296 100644
--- a/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/dynamic_index/read.wgsl.expected.wgsl

@@ -1,36 +1,57 @@
 struct Inner {
-  a : vec3<i32>,
-  b : i32,
-  c : vec3<u32>,
-  d : u32,
-  e : vec3<f32>,
-  f : f32,
-  g : vec2<i32>,
-  h : vec2<i32>,
-  i : mat2x3<f32>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
   @align(16)
-  j : mat3x2<f32>,
-  @align(16)
-  k : array<vec4<i32>, 4>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
 }
 
 struct S {
   arr : array<Inner, 8>,
 }
 
-@binding(0) @group(0) var<uniform> s : S;
+@binding(0) @group(0) var<uniform> ub : S;
 
 @compute @workgroup_size(1)
 fn main(@builtin(local_invocation_index) idx : u32) {
-  let a = s.arr[idx].a;
-  let b = s.arr[idx].b;
-  let c = s.arr[idx].c;
-  let d = s.arr[idx].d;
-  let e = s.arr[idx].e;
-  let f = s.arr[idx].f;
-  let g = s.arr[idx].g;
-  let h = s.arr[idx].h;
-  let i = s.arr[idx].i;
-  let j = s.arr[idx].j;
-  let k = s.arr[idx].k;
+  let scalar_f32 : f32 = ub.arr[idx].scalar_f32;
+  let scalar_i32 : i32 = ub.arr[idx].scalar_i32;
+  let scalar_u32 : u32 = ub.arr[idx].scalar_u32;
+  let vec2_f32 : vec2<f32> = ub.arr[idx].vec2_f32;
+  let vec2_i32 : vec2<i32> = ub.arr[idx].vec2_i32;
+  let vec2_u32 : vec2<u32> = ub.arr[idx].vec2_u32;
+  let vec3_f32 : vec3<f32> = ub.arr[idx].vec3_f32;
+  let vec3_i32 : vec3<i32> = ub.arr[idx].vec3_i32;
+  let vec3_u32 : vec3<u32> = ub.arr[idx].vec3_u32;
+  let vec4_f32 : vec4<f32> = ub.arr[idx].vec4_f32;
+  let vec4_i32 : vec4<i32> = ub.arr[idx].vec4_i32;
+  let vec4_u32 : vec4<u32> = ub.arr[idx].vec4_u32;
+  let mat2x2_f32 : mat2x2<f32> = ub.arr[idx].mat2x2_f32;
+  let mat2x3_f32 : mat2x3<f32> = ub.arr[idx].mat2x3_f32;
+  let mat2x4_f32 : mat2x4<f32> = ub.arr[idx].mat2x4_f32;
+  let mat3x2_f32 : mat3x2<f32> = ub.arr[idx].mat3x2_f32;
+  let mat3x3_f32 : mat3x3<f32> = ub.arr[idx].mat3x3_f32;
+  let mat3x4_f32 : mat3x4<f32> = ub.arr[idx].mat3x4_f32;
+  let mat4x2_f32 : mat4x2<f32> = ub.arr[idx].mat4x2_f32;
+  let mat4x3_f32 : mat4x3<f32> = ub.arr[idx].mat4x3_f32;
+  let mat4x4_f32 : mat4x4<f32> = ub.arr[idx].mat4x4_f32;
+  let arr2_vec3_f32 : array<vec3<f32>, 2> = ub.arr[idx].arr2_vec3_f32;
 }

diff --git a/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl
new file mode 100644
index 0000000..707e982
--- /dev/null
+++ b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl

@@ -0,0 +1,86 @@
+enable f16;
+
+struct Inner {
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    scalar_f16 : f16,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec2_f16 : vec2<f16>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec3_f16 : vec3<f16>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    vec4_f16 : vec4<f16>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    mat2x2_f16 : mat2x2<f16>,
+    mat2x3_f16 : mat2x3<f16>,
+    mat2x4_f16 : mat2x4<f16>,
+    mat3x2_f16 : mat3x2<f16>,
+    mat3x3_f16 : mat3x3<f16>,
+    mat3x4_f16 : mat3x4<f16>,
+    mat4x2_f16 : mat4x2<f16>,
+    mat4x3_f16 : mat4x3<f16>,
+    mat4x4_f16 : mat4x4<f16>,
+    @align(16) arr2_vec3_f32 : array<vec3<f32>, 2>,
+    arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+};
+
+struct S {
+    arr : array<Inner, 8>,
+};
+
+@binding(0) @group(0) var<uniform> ub : S;
+
+@compute @workgroup_size(1)
+fn main(@builtin(local_invocation_index) idx : u32) {
+    let scalar_f32 : f32 = ub.arr[idx].scalar_f32;
+    let scalar_i32 : i32 = ub.arr[idx].scalar_i32;
+    let scalar_u32 : u32 = ub.arr[idx].scalar_u32;
+    let scalar_f16 : f16 = ub.arr[idx].scalar_f16;
+    let vec2_f32 : vec2<f32> = ub.arr[idx].vec2_f32;
+    let vec2_i32 : vec2<i32> = ub.arr[idx].vec2_i32;
+    let vec2_u32 : vec2<u32> = ub.arr[idx].vec2_u32;
+    let vec2_f16 : vec2<f16> = ub.arr[idx].vec2_f16;
+    let vec3_f32 : vec3<f32> = ub.arr[idx].vec3_f32;
+    let vec3_i32 : vec3<i32> = ub.arr[idx].vec3_i32;
+    let vec3_u32 : vec3<u32> = ub.arr[idx].vec3_u32;
+    let vec3_f16 : vec3<f16> = ub.arr[idx].vec3_f16;
+    let vec4_f32 : vec4<f32> = ub.arr[idx].vec4_f32;
+    let vec4_i32 : vec4<i32> = ub.arr[idx].vec4_i32;
+    let vec4_u32 : vec4<u32> = ub.arr[idx].vec4_u32;
+    let vec4_f16 : vec4<f16> = ub.arr[idx].vec4_f16;
+    let mat2x2_f32 : mat2x2<f32> = ub.arr[idx].mat2x2_f32;
+    let mat2x3_f32 : mat2x3<f32> = ub.arr[idx].mat2x3_f32;
+    let mat2x4_f32 : mat2x4<f32> = ub.arr[idx].mat2x4_f32;
+    let mat3x2_f32 : mat3x2<f32> = ub.arr[idx].mat3x2_f32;
+    let mat3x3_f32 : mat3x3<f32> = ub.arr[idx].mat3x3_f32;
+    let mat3x4_f32 : mat3x4<f32> = ub.arr[idx].mat3x4_f32;
+    let mat4x2_f32 : mat4x2<f32> = ub.arr[idx].mat4x2_f32;
+    let mat4x3_f32 : mat4x3<f32> = ub.arr[idx].mat4x3_f32;
+    let mat4x4_f32 : mat4x4<f32> = ub.arr[idx].mat4x4_f32;
+    let mat2x2_f16 : mat2x2<f16> = ub.arr[idx].mat2x2_f16;
+    let mat2x3_f16 : mat2x3<f16> = ub.arr[idx].mat2x3_f16;
+    let mat2x4_f16 : mat2x4<f16> = ub.arr[idx].mat2x4_f16;
+    let mat3x2_f16 : mat3x2<f16> = ub.arr[idx].mat3x2_f16;
+    let mat3x3_f16 : mat3x3<f16> = ub.arr[idx].mat3x3_f16;
+    let mat3x4_f16 : mat3x4<f16> = ub.arr[idx].mat3x4_f16;
+    let mat4x2_f16 : mat4x2<f16> = ub.arr[idx].mat4x2_f16;
+    let mat4x3_f16 : mat4x3<f16> = ub.arr[idx].mat4x3_f16;
+    let mat4x4_f16 : mat4x4<f16> = ub.arr[idx].mat4x4_f16;
+    let arr2_vec3_f32 : array<vec3<f32>, 2> = ub.arr[idx].arr2_vec3_f32;
+    let arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = ub.arr[idx].arr2_mat4x2_f16;
+}
\ No newline at end of file

diff --git a/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5ba1911
--- /dev/null
+++ b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,320 @@
+cbuffer cbuffer_ub : register(b0, space0) {
+  uint4 ub[400];
+};
+
+struct tint_symbol_1 {
+  uint idx : SV_GroupIndex;
+};
+
+float2x2 tint_symbol_18(uint4 buffer[400], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+float2x3 tint_symbol_19(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+float2x4 tint_symbol_20(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset_4 / 4]), asfloat(buffer[scalar_offset_5 / 4]));
+}
+
+float3x2 tint_symbol_21(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_6 = ((offset + 0u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_6 / 4];
+  const uint scalar_offset_7 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_7 / 4];
+  const uint scalar_offset_8 = ((offset + 16u)) / 4;
+  uint4 ubo_load_4 = buffer[scalar_offset_8 / 4];
+  return float3x2(asfloat(((scalar_offset_6 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_7 & 2) ? ubo_load_3.zw : ubo_load_3.xy)), asfloat(((scalar_offset_8 & 2) ? ubo_load_4.zw : ubo_load_4.xy)));
+}
+
+float3x3 tint_symbol_22(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_9 = ((offset + 0u)) / 4;
+  const uint scalar_offset_10 = ((offset + 16u)) / 4;
+  const uint scalar_offset_11 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset_9 / 4].xyz), asfloat(buffer[scalar_offset_10 / 4].xyz), asfloat(buffer[scalar_offset_11 / 4].xyz));
+}
+
+float3x4 tint_symbol_23(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_12 = ((offset + 0u)) / 4;
+  const uint scalar_offset_13 = ((offset + 16u)) / 4;
+  const uint scalar_offset_14 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset_12 / 4]), asfloat(buffer[scalar_offset_13 / 4]), asfloat(buffer[scalar_offset_14 / 4]));
+}
+
+float4x2 tint_symbol_24(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_15 = ((offset + 0u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_15 / 4];
+  const uint scalar_offset_16 = ((offset + 8u)) / 4;
+  uint4 ubo_load_6 = buffer[scalar_offset_16 / 4];
+  const uint scalar_offset_17 = ((offset + 16u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_17 / 4];
+  const uint scalar_offset_18 = ((offset + 24u)) / 4;
+  uint4 ubo_load_8 = buffer[scalar_offset_18 / 4];
+  return float4x2(asfloat(((scalar_offset_15 & 2) ? ubo_load_5.zw : ubo_load_5.xy)), asfloat(((scalar_offset_16 & 2) ? ubo_load_6.zw : ubo_load_6.xy)), asfloat(((scalar_offset_17 & 2) ? ubo_load_7.zw : ubo_load_7.xy)), asfloat(((scalar_offset_18 & 2) ? ubo_load_8.zw : ubo_load_8.xy)));
+}
+
+float4x3 tint_symbol_25(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_19 = ((offset + 0u)) / 4;
+  const uint scalar_offset_20 = ((offset + 16u)) / 4;
+  const uint scalar_offset_21 = ((offset + 32u)) / 4;
+  const uint scalar_offset_22 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset_19 / 4].xyz), asfloat(buffer[scalar_offset_20 / 4].xyz), asfloat(buffer[scalar_offset_21 / 4].xyz), asfloat(buffer[scalar_offset_22 / 4].xyz));
+}
+
+float4x4 tint_symbol_26(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_23 = ((offset + 0u)) / 4;
+  const uint scalar_offset_24 = ((offset + 16u)) / 4;
+  const uint scalar_offset_25 = ((offset + 32u)) / 4;
+  const uint scalar_offset_26 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset_23 / 4]), asfloat(buffer[scalar_offset_24 / 4]), asfloat(buffer[scalar_offset_25 / 4]), asfloat(buffer[scalar_offset_26 / 4]));
+}
+
+matrix<float16_t, 2, 2> tint_symbol_27(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_27 = ((offset + 0u)) / 4;
+  uint ubo_load_9 = buffer[scalar_offset_27 / 4][scalar_offset_27 % 4];
+  const uint scalar_offset_28 = ((offset + 4u)) / 4;
+  uint ubo_load_10 = buffer[scalar_offset_28 / 4][scalar_offset_28 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_9 & 0xFFFF)), float16_t(f16tof32(ubo_load_9 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_10 & 0xFFFF)), float16_t(f16tof32(ubo_load_10 >> 16))));
+}
+
+matrix<float16_t, 2, 3> tint_symbol_28(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_29 = ((offset + 0u)) / 4;
+  uint4 ubo_load_12 = buffer[scalar_offset_29 / 4];
+  uint2 ubo_load_11 = ((scalar_offset_29 & 2) ? ubo_load_12.zw : ubo_load_12.xy);
+  vector<float16_t, 2> ubo_load_11_xz = vector<float16_t, 2>(f16tof32(ubo_load_11 & 0xFFFF));
+  float16_t ubo_load_11_y = f16tof32(ubo_load_11[0] >> 16);
+  const uint scalar_offset_30 = ((offset + 8u)) / 4;
+  uint4 ubo_load_14 = buffer[scalar_offset_30 / 4];
+  uint2 ubo_load_13 = ((scalar_offset_30 & 2) ? ubo_load_14.zw : ubo_load_14.xy);
+  vector<float16_t, 2> ubo_load_13_xz = vector<float16_t, 2>(f16tof32(ubo_load_13 & 0xFFFF));
+  float16_t ubo_load_13_y = f16tof32(ubo_load_13[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_11_xz[0], ubo_load_11_y, ubo_load_11_xz[1]), vector<float16_t, 3>(ubo_load_13_xz[0], ubo_load_13_y, ubo_load_13_xz[1]));
+}
+
+matrix<float16_t, 2, 4> tint_symbol_29(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_31 = ((offset + 0u)) / 4;
+  uint4 ubo_load_16 = buffer[scalar_offset_31 / 4];
+  uint2 ubo_load_15 = ((scalar_offset_31 & 2) ? ubo_load_16.zw : ubo_load_16.xy);
+  vector<float16_t, 2> ubo_load_15_xz = vector<float16_t, 2>(f16tof32(ubo_load_15 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_15_yw = vector<float16_t, 2>(f16tof32(ubo_load_15 >> 16));
+  const uint scalar_offset_32 = ((offset + 8u)) / 4;
+  uint4 ubo_load_18 = buffer[scalar_offset_32 / 4];
+  uint2 ubo_load_17 = ((scalar_offset_32 & 2) ? ubo_load_18.zw : ubo_load_18.xy);
+  vector<float16_t, 2> ubo_load_17_xz = vector<float16_t, 2>(f16tof32(ubo_load_17 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_17_yw = vector<float16_t, 2>(f16tof32(ubo_load_17 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_15_xz[0], ubo_load_15_yw[0], ubo_load_15_xz[1], ubo_load_15_yw[1]), vector<float16_t, 4>(ubo_load_17_xz[0], ubo_load_17_yw[0], ubo_load_17_xz[1], ubo_load_17_yw[1]));
+}
+
+matrix<float16_t, 3, 2> tint_symbol_30(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_33 = ((offset + 0u)) / 4;
+  uint ubo_load_19 = buffer[scalar_offset_33 / 4][scalar_offset_33 % 4];
+  const uint scalar_offset_34 = ((offset + 4u)) / 4;
+  uint ubo_load_20 = buffer[scalar_offset_34 / 4][scalar_offset_34 % 4];
+  const uint scalar_offset_35 = ((offset + 8u)) / 4;
+  uint ubo_load_21 = buffer[scalar_offset_35 / 4][scalar_offset_35 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_19 & 0xFFFF)), float16_t(f16tof32(ubo_load_19 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_20 & 0xFFFF)), float16_t(f16tof32(ubo_load_20 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_21 & 0xFFFF)), float16_t(f16tof32(ubo_load_21 >> 16))));
+}
+
+matrix<float16_t, 3, 3> tint_symbol_31(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_36 = ((offset + 0u)) / 4;
+  uint4 ubo_load_23 = buffer[scalar_offset_36 / 4];
+  uint2 ubo_load_22 = ((scalar_offset_36 & 2) ? ubo_load_23.zw : ubo_load_23.xy);
+  vector<float16_t, 2> ubo_load_22_xz = vector<float16_t, 2>(f16tof32(ubo_load_22 & 0xFFFF));
+  float16_t ubo_load_22_y = f16tof32(ubo_load_22[0] >> 16);
+  const uint scalar_offset_37 = ((offset + 8u)) / 4;
+  uint4 ubo_load_25 = buffer[scalar_offset_37 / 4];
+  uint2 ubo_load_24 = ((scalar_offset_37 & 2) ? ubo_load_25.zw : ubo_load_25.xy);
+  vector<float16_t, 2> ubo_load_24_xz = vector<float16_t, 2>(f16tof32(ubo_load_24 & 0xFFFF));
+  float16_t ubo_load_24_y = f16tof32(ubo_load_24[0] >> 16);
+  const uint scalar_offset_38 = ((offset + 16u)) / 4;
+  uint4 ubo_load_27 = buffer[scalar_offset_38 / 4];
+  uint2 ubo_load_26 = ((scalar_offset_38 & 2) ? ubo_load_27.zw : ubo_load_27.xy);
+  vector<float16_t, 2> ubo_load_26_xz = vector<float16_t, 2>(f16tof32(ubo_load_26 & 0xFFFF));
+  float16_t ubo_load_26_y = f16tof32(ubo_load_26[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_22_xz[0], ubo_load_22_y, ubo_load_22_xz[1]), vector<float16_t, 3>(ubo_load_24_xz[0], ubo_load_24_y, ubo_load_24_xz[1]), vector<float16_t, 3>(ubo_load_26_xz[0], ubo_load_26_y, ubo_load_26_xz[1]));
+}
+
+matrix<float16_t, 3, 4> tint_symbol_32(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_39 = ((offset + 0u)) / 4;
+  uint4 ubo_load_29 = buffer[scalar_offset_39 / 4];
+  uint2 ubo_load_28 = ((scalar_offset_39 & 2) ? ubo_load_29.zw : ubo_load_29.xy);
+  vector<float16_t, 2> ubo_load_28_xz = vector<float16_t, 2>(f16tof32(ubo_load_28 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_28_yw = vector<float16_t, 2>(f16tof32(ubo_load_28 >> 16));
+  const uint scalar_offset_40 = ((offset + 8u)) / 4;
+  uint4 ubo_load_31 = buffer[scalar_offset_40 / 4];
+  uint2 ubo_load_30 = ((scalar_offset_40 & 2) ? ubo_load_31.zw : ubo_load_31.xy);
+  vector<float16_t, 2> ubo_load_30_xz = vector<float16_t, 2>(f16tof32(ubo_load_30 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_30_yw = vector<float16_t, 2>(f16tof32(ubo_load_30 >> 16));
+  const uint scalar_offset_41 = ((offset + 16u)) / 4;
+  uint4 ubo_load_33 = buffer[scalar_offset_41 / 4];
+  uint2 ubo_load_32 = ((scalar_offset_41 & 2) ? ubo_load_33.zw : ubo_load_33.xy);
+  vector<float16_t, 2> ubo_load_32_xz = vector<float16_t, 2>(f16tof32(ubo_load_32 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_32_yw = vector<float16_t, 2>(f16tof32(ubo_load_32 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_28_xz[0], ubo_load_28_yw[0], ubo_load_28_xz[1], ubo_load_28_yw[1]), vector<float16_t, 4>(ubo_load_30_xz[0], ubo_load_30_yw[0], ubo_load_30_xz[1], ubo_load_30_yw[1]), vector<float16_t, 4>(ubo_load_32_xz[0], ubo_load_32_yw[0], ubo_load_32_xz[1], ubo_load_32_yw[1]));
+}
+
+matrix<float16_t, 4, 2> tint_symbol_33(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_42 = ((offset + 0u)) / 4;
+  uint ubo_load_34 = buffer[scalar_offset_42 / 4][scalar_offset_42 % 4];
+  const uint scalar_offset_43 = ((offset + 4u)) / 4;
+  uint ubo_load_35 = buffer[scalar_offset_43 / 4][scalar_offset_43 % 4];
+  const uint scalar_offset_44 = ((offset + 8u)) / 4;
+  uint ubo_load_36 = buffer[scalar_offset_44 / 4][scalar_offset_44 % 4];
+  const uint scalar_offset_45 = ((offset + 12u)) / 4;
+  uint ubo_load_37 = buffer[scalar_offset_45 / 4][scalar_offset_45 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_34 & 0xFFFF)), float16_t(f16tof32(ubo_load_34 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_35 & 0xFFFF)), float16_t(f16tof32(ubo_load_35 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_36 & 0xFFFF)), float16_t(f16tof32(ubo_load_36 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_37 & 0xFFFF)), float16_t(f16tof32(ubo_load_37 >> 16))));
+}
+
+matrix<float16_t, 4, 3> tint_symbol_34(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_46 = ((offset + 0u)) / 4;
+  uint4 ubo_load_39 = buffer[scalar_offset_46 / 4];
+  uint2 ubo_load_38 = ((scalar_offset_46 & 2) ? ubo_load_39.zw : ubo_load_39.xy);
+  vector<float16_t, 2> ubo_load_38_xz = vector<float16_t, 2>(f16tof32(ubo_load_38 & 0xFFFF));
+  float16_t ubo_load_38_y = f16tof32(ubo_load_38[0] >> 16);
+  const uint scalar_offset_47 = ((offset + 8u)) / 4;
+  uint4 ubo_load_41 = buffer[scalar_offset_47 / 4];
+  uint2 ubo_load_40 = ((scalar_offset_47 & 2) ? ubo_load_41.zw : ubo_load_41.xy);
+  vector<float16_t, 2> ubo_load_40_xz = vector<float16_t, 2>(f16tof32(ubo_load_40 & 0xFFFF));
+  float16_t ubo_load_40_y = f16tof32(ubo_load_40[0] >> 16);
+  const uint scalar_offset_48 = ((offset + 16u)) / 4;
+  uint4 ubo_load_43 = buffer[scalar_offset_48 / 4];
+  uint2 ubo_load_42 = ((scalar_offset_48 & 2) ? ubo_load_43.zw : ubo_load_43.xy);
+  vector<float16_t, 2> ubo_load_42_xz = vector<float16_t, 2>(f16tof32(ubo_load_42 & 0xFFFF));
+  float16_t ubo_load_42_y = f16tof32(ubo_load_42[0] >> 16);
+  const uint scalar_offset_49 = ((offset + 24u)) / 4;
+  uint4 ubo_load_45 = buffer[scalar_offset_49 / 4];
+  uint2 ubo_load_44 = ((scalar_offset_49 & 2) ? ubo_load_45.zw : ubo_load_45.xy);
+  vector<float16_t, 2> ubo_load_44_xz = vector<float16_t, 2>(f16tof32(ubo_load_44 & 0xFFFF));
+  float16_t ubo_load_44_y = f16tof32(ubo_load_44[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_38_xz[0], ubo_load_38_y, ubo_load_38_xz[1]), vector<float16_t, 3>(ubo_load_40_xz[0], ubo_load_40_y, ubo_load_40_xz[1]), vector<float16_t, 3>(ubo_load_42_xz[0], ubo_load_42_y, ubo_load_42_xz[1]), vector<float16_t, 3>(ubo_load_44_xz[0], ubo_load_44_y, ubo_load_44_xz[1]));
+}
+
+matrix<float16_t, 4, 4> tint_symbol_35(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_50 = ((offset + 0u)) / 4;
+  uint4 ubo_load_47 = buffer[scalar_offset_50 / 4];
+  uint2 ubo_load_46 = ((scalar_offset_50 & 2) ? ubo_load_47.zw : ubo_load_47.xy);
+  vector<float16_t, 2> ubo_load_46_xz = vector<float16_t, 2>(f16tof32(ubo_load_46 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_46_yw = vector<float16_t, 2>(f16tof32(ubo_load_46 >> 16));
+  const uint scalar_offset_51 = ((offset + 8u)) / 4;
+  uint4 ubo_load_49 = buffer[scalar_offset_51 / 4];
+  uint2 ubo_load_48 = ((scalar_offset_51 & 2) ? ubo_load_49.zw : ubo_load_49.xy);
+  vector<float16_t, 2> ubo_load_48_xz = vector<float16_t, 2>(f16tof32(ubo_load_48 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_48_yw = vector<float16_t, 2>(f16tof32(ubo_load_48 >> 16));
+  const uint scalar_offset_52 = ((offset + 16u)) / 4;
+  uint4 ubo_load_51 = buffer[scalar_offset_52 / 4];
+  uint2 ubo_load_50 = ((scalar_offset_52 & 2) ? ubo_load_51.zw : ubo_load_51.xy);
+  vector<float16_t, 2> ubo_load_50_xz = vector<float16_t, 2>(f16tof32(ubo_load_50 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_50_yw = vector<float16_t, 2>(f16tof32(ubo_load_50 >> 16));
+  const uint scalar_offset_53 = ((offset + 24u)) / 4;
+  uint4 ubo_load_53 = buffer[scalar_offset_53 / 4];
+  uint2 ubo_load_52 = ((scalar_offset_53 & 2) ? ubo_load_53.zw : ubo_load_53.xy);
+  vector<float16_t, 2> ubo_load_52_xz = vector<float16_t, 2>(f16tof32(ubo_load_52 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_52_yw = vector<float16_t, 2>(f16tof32(ubo_load_52 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_46_xz[0], ubo_load_46_yw[0], ubo_load_46_xz[1], ubo_load_46_yw[1]), vector<float16_t, 4>(ubo_load_48_xz[0], ubo_load_48_yw[0], ubo_load_48_xz[1], ubo_load_48_yw[1]), vector<float16_t, 4>(ubo_load_50_xz[0], ubo_load_50_yw[0], ubo_load_50_xz[1], ubo_load_50_yw[1]), vector<float16_t, 4>(ubo_load_52_xz[0], ubo_load_52_yw[0], ubo_load_52_xz[1], ubo_load_52_yw[1]));
+}
+
+typedef float3 tint_symbol_36_ret[2];
+tint_symbol_36_ret tint_symbol_36(uint4 buffer[400], uint offset) {
+  float3 arr_1[2] = (float3[2])0;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      const uint scalar_offset_54 = ((offset + (i * 16u))) / 4;
+      arr_1[i] = asfloat(buffer[scalar_offset_54 / 4].xyz);
+    }
+  }
+  return arr_1;
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_37_ret[2];
+tint_symbol_37_ret tint_symbol_37(uint4 buffer[400], uint offset) {
+  matrix<float16_t, 4, 2> arr_2[2] = (matrix<float16_t, 4, 2>[2])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      arr_2[i_1] = tint_symbol_33(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr_2;
+}
+
+void main_inner(uint idx) {
+  const uint scalar_offset_55 = ((800u * idx)) / 4;
+  const float scalar_f32 = asfloat(ub[scalar_offset_55 / 4][scalar_offset_55 % 4]);
+  const uint scalar_offset_56 = (((800u * idx) + 4u)) / 4;
+  const int scalar_i32 = asint(ub[scalar_offset_56 / 4][scalar_offset_56 % 4]);
+  const uint scalar_offset_57 = (((800u * idx) + 8u)) / 4;
+  const uint scalar_u32 = ub[scalar_offset_57 / 4][scalar_offset_57 % 4];
+  const uint scalar_offset_bytes = (((800u * idx) + 12u));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t scalar_f16 = float16_t(f16tof32(((ub[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  const uint scalar_offset_58 = (((800u * idx) + 16u)) / 4;
+  uint4 ubo_load_54 = ub[scalar_offset_58 / 4];
+  const float2 vec2_f32 = asfloat(((scalar_offset_58 & 2) ? ubo_load_54.zw : ubo_load_54.xy));
+  const uint scalar_offset_59 = (((800u * idx) + 24u)) / 4;
+  uint4 ubo_load_55 = ub[scalar_offset_59 / 4];
+  const int2 vec2_i32 = asint(((scalar_offset_59 & 2) ? ubo_load_55.zw : ubo_load_55.xy));
+  const uint scalar_offset_60 = (((800u * idx) + 32u)) / 4;
+  uint4 ubo_load_56 = ub[scalar_offset_60 / 4];
+  const uint2 vec2_u32 = ((scalar_offset_60 & 2) ? ubo_load_56.zw : ubo_load_56.xy);
+  const uint scalar_offset_61 = (((800u * idx) + 40u)) / 4;
+  uint ubo_load_57 = ub[scalar_offset_61 / 4][scalar_offset_61 % 4];
+  const vector<float16_t, 2> vec2_f16 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_57 & 0xFFFF)), float16_t(f16tof32(ubo_load_57 >> 16)));
+  const uint scalar_offset_62 = (((800u * idx) + 48u)) / 4;
+  const float3 vec3_f32 = asfloat(ub[scalar_offset_62 / 4].xyz);
+  const uint scalar_offset_63 = (((800u * idx) + 64u)) / 4;
+  const int3 vec3_i32 = asint(ub[scalar_offset_63 / 4].xyz);
+  const uint scalar_offset_64 = (((800u * idx) + 80u)) / 4;
+  const uint3 vec3_u32 = ub[scalar_offset_64 / 4].xyz;
+  const uint scalar_offset_65 = (((800u * idx) + 96u)) / 4;
+  uint4 ubo_load_59 = ub[scalar_offset_65 / 4];
+  uint2 ubo_load_58 = ((scalar_offset_65 & 2) ? ubo_load_59.zw : ubo_load_59.xy);
+  vector<float16_t, 2> ubo_load_58_xz = vector<float16_t, 2>(f16tof32(ubo_load_58 & 0xFFFF));
+  float16_t ubo_load_58_y = f16tof32(ubo_load_58[0] >> 16);
+  const vector<float16_t, 3> vec3_f16 = vector<float16_t, 3>(ubo_load_58_xz[0], ubo_load_58_y, ubo_load_58_xz[1]);
+  const uint scalar_offset_66 = (((800u * idx) + 112u)) / 4;
+  const float4 vec4_f32 = asfloat(ub[scalar_offset_66 / 4]);
+  const uint scalar_offset_67 = (((800u * idx) + 128u)) / 4;
+  const int4 vec4_i32 = asint(ub[scalar_offset_67 / 4]);
+  const uint scalar_offset_68 = (((800u * idx) + 144u)) / 4;
+  const uint4 vec4_u32 = ub[scalar_offset_68 / 4];
+  const uint scalar_offset_69 = (((800u * idx) + 160u)) / 4;
+  uint4 ubo_load_61 = ub[scalar_offset_69 / 4];
+  uint2 ubo_load_60 = ((scalar_offset_69 & 2) ? ubo_load_61.zw : ubo_load_61.xy);
+  vector<float16_t, 2> ubo_load_60_xz = vector<float16_t, 2>(f16tof32(ubo_load_60 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_60_yw = vector<float16_t, 2>(f16tof32(ubo_load_60 >> 16));
+  const vector<float16_t, 4> vec4_f16 = vector<float16_t, 4>(ubo_load_60_xz[0], ubo_load_60_yw[0], ubo_load_60_xz[1], ubo_load_60_yw[1]);
+  const float2x2 mat2x2_f32 = tint_symbol_18(ub, ((800u * idx) + 168u));
+  const float2x3 mat2x3_f32 = tint_symbol_19(ub, ((800u * idx) + 192u));
+  const float2x4 mat2x4_f32 = tint_symbol_20(ub, ((800u * idx) + 224u));
+  const float3x2 mat3x2_f32 = tint_symbol_21(ub, ((800u * idx) + 256u));
+  const float3x3 mat3x3_f32 = tint_symbol_22(ub, ((800u * idx) + 288u));
+  const float3x4 mat3x4_f32 = tint_symbol_23(ub, ((800u * idx) + 336u));
+  const float4x2 mat4x2_f32 = tint_symbol_24(ub, ((800u * idx) + 384u));
+  const float4x3 mat4x3_f32 = tint_symbol_25(ub, ((800u * idx) + 416u));
+  const float4x4 mat4x4_f32 = tint_symbol_26(ub, ((800u * idx) + 480u));
+  const matrix<float16_t, 2, 2> mat2x2_f16 = tint_symbol_27(ub, ((800u * idx) + 544u));
+  const matrix<float16_t, 2, 3> mat2x3_f16 = tint_symbol_28(ub, ((800u * idx) + 552u));
+  const matrix<float16_t, 2, 4> mat2x4_f16 = tint_symbol_29(ub, ((800u * idx) + 568u));
+  const matrix<float16_t, 3, 2> mat3x2_f16 = tint_symbol_30(ub, ((800u * idx) + 584u));
+  const matrix<float16_t, 3, 3> mat3x3_f16 = tint_symbol_31(ub, ((800u * idx) + 600u));
+  const matrix<float16_t, 3, 4> mat3x4_f16 = tint_symbol_32(ub, ((800u * idx) + 624u));
+  const matrix<float16_t, 4, 2> mat4x2_f16 = tint_symbol_33(ub, ((800u * idx) + 648u));
+  const matrix<float16_t, 4, 3> mat4x3_f16 = tint_symbol_34(ub, ((800u * idx) + 664u));
+  const matrix<float16_t, 4, 4> mat4x4_f16 = tint_symbol_35(ub, ((800u * idx) + 696u));
+  const float3 arr2_vec3_f32[2] = tint_symbol_36(ub, ((800u * idx) + 736u));
+  const matrix<float16_t, 4, 2> arr2_mat4x2_f16[2] = tint_symbol_37(ub, ((800u * idx) + 768u));
+}
+
+[numthreads(1, 1, 1)]
+void main(tint_symbol_1 tint_symbol) {
+  main_inner(tint_symbol.idx);
+  return;
+}

diff --git a/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..623e140
--- /dev/null
+++ b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,325 @@
+SKIP: FAILED
+
+cbuffer cbuffer_ub : register(b0, space0) {
+  uint4 ub[400];
+};
+
+struct tint_symbol_1 {
+  uint idx : SV_GroupIndex;
+};
+
+float2x2 tint_symbol_18(uint4 buffer[400], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+float2x3 tint_symbol_19(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+float2x4 tint_symbol_20(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset_4 / 4]), asfloat(buffer[scalar_offset_5 / 4]));
+}
+
+float3x2 tint_symbol_21(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_6 = ((offset + 0u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_6 / 4];
+  const uint scalar_offset_7 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_7 / 4];
+  const uint scalar_offset_8 = ((offset + 16u)) / 4;
+  uint4 ubo_load_4 = buffer[scalar_offset_8 / 4];
+  return float3x2(asfloat(((scalar_offset_6 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_7 & 2) ? ubo_load_3.zw : ubo_load_3.xy)), asfloat(((scalar_offset_8 & 2) ? ubo_load_4.zw : ubo_load_4.xy)));
+}
+
+float3x3 tint_symbol_22(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_9 = ((offset + 0u)) / 4;
+  const uint scalar_offset_10 = ((offset + 16u)) / 4;
+  const uint scalar_offset_11 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset_9 / 4].xyz), asfloat(buffer[scalar_offset_10 / 4].xyz), asfloat(buffer[scalar_offset_11 / 4].xyz));
+}
+
+float3x4 tint_symbol_23(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_12 = ((offset + 0u)) / 4;
+  const uint scalar_offset_13 = ((offset + 16u)) / 4;
+  const uint scalar_offset_14 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset_12 / 4]), asfloat(buffer[scalar_offset_13 / 4]), asfloat(buffer[scalar_offset_14 / 4]));
+}
+
+float4x2 tint_symbol_24(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_15 = ((offset + 0u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_15 / 4];
+  const uint scalar_offset_16 = ((offset + 8u)) / 4;
+  uint4 ubo_load_6 = buffer[scalar_offset_16 / 4];
+  const uint scalar_offset_17 = ((offset + 16u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_17 / 4];
+  const uint scalar_offset_18 = ((offset + 24u)) / 4;
+  uint4 ubo_load_8 = buffer[scalar_offset_18 / 4];
+  return float4x2(asfloat(((scalar_offset_15 & 2) ? ubo_load_5.zw : ubo_load_5.xy)), asfloat(((scalar_offset_16 & 2) ? ubo_load_6.zw : ubo_load_6.xy)), asfloat(((scalar_offset_17 & 2) ? ubo_load_7.zw : ubo_load_7.xy)), asfloat(((scalar_offset_18 & 2) ? ubo_load_8.zw : ubo_load_8.xy)));
+}
+
+float4x3 tint_symbol_25(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_19 = ((offset + 0u)) / 4;
+  const uint scalar_offset_20 = ((offset + 16u)) / 4;
+  const uint scalar_offset_21 = ((offset + 32u)) / 4;
+  const uint scalar_offset_22 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset_19 / 4].xyz), asfloat(buffer[scalar_offset_20 / 4].xyz), asfloat(buffer[scalar_offset_21 / 4].xyz), asfloat(buffer[scalar_offset_22 / 4].xyz));
+}
+
+float4x4 tint_symbol_26(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_23 = ((offset + 0u)) / 4;
+  const uint scalar_offset_24 = ((offset + 16u)) / 4;
+  const uint scalar_offset_25 = ((offset + 32u)) / 4;
+  const uint scalar_offset_26 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset_23 / 4]), asfloat(buffer[scalar_offset_24 / 4]), asfloat(buffer[scalar_offset_25 / 4]), asfloat(buffer[scalar_offset_26 / 4]));
+}
+
+matrix<float16_t, 2, 2> tint_symbol_27(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_27 = ((offset + 0u)) / 4;
+  uint ubo_load_9 = buffer[scalar_offset_27 / 4][scalar_offset_27 % 4];
+  const uint scalar_offset_28 = ((offset + 4u)) / 4;
+  uint ubo_load_10 = buffer[scalar_offset_28 / 4][scalar_offset_28 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_9 & 0xFFFF)), float16_t(f16tof32(ubo_load_9 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_10 & 0xFFFF)), float16_t(f16tof32(ubo_load_10 >> 16))));
+}
+
+matrix<float16_t, 2, 3> tint_symbol_28(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_29 = ((offset + 0u)) / 4;
+  uint4 ubo_load_12 = buffer[scalar_offset_29 / 4];
+  uint2 ubo_load_11 = ((scalar_offset_29 & 2) ? ubo_load_12.zw : ubo_load_12.xy);
+  vector<float16_t, 2> ubo_load_11_xz = vector<float16_t, 2>(f16tof32(ubo_load_11 & 0xFFFF));
+  float16_t ubo_load_11_y = f16tof32(ubo_load_11[0] >> 16);
+  const uint scalar_offset_30 = ((offset + 8u)) / 4;
+  uint4 ubo_load_14 = buffer[scalar_offset_30 / 4];
+  uint2 ubo_load_13 = ((scalar_offset_30 & 2) ? ubo_load_14.zw : ubo_load_14.xy);
+  vector<float16_t, 2> ubo_load_13_xz = vector<float16_t, 2>(f16tof32(ubo_load_13 & 0xFFFF));
+  float16_t ubo_load_13_y = f16tof32(ubo_load_13[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_11_xz[0], ubo_load_11_y, ubo_load_11_xz[1]), vector<float16_t, 3>(ubo_load_13_xz[0], ubo_load_13_y, ubo_load_13_xz[1]));
+}
+
+matrix<float16_t, 2, 4> tint_symbol_29(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_31 = ((offset + 0u)) / 4;
+  uint4 ubo_load_16 = buffer[scalar_offset_31 / 4];
+  uint2 ubo_load_15 = ((scalar_offset_31 & 2) ? ubo_load_16.zw : ubo_load_16.xy);
+  vector<float16_t, 2> ubo_load_15_xz = vector<float16_t, 2>(f16tof32(ubo_load_15 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_15_yw = vector<float16_t, 2>(f16tof32(ubo_load_15 >> 16));
+  const uint scalar_offset_32 = ((offset + 8u)) / 4;
+  uint4 ubo_load_18 = buffer[scalar_offset_32 / 4];
+  uint2 ubo_load_17 = ((scalar_offset_32 & 2) ? ubo_load_18.zw : ubo_load_18.xy);
+  vector<float16_t, 2> ubo_load_17_xz = vector<float16_t, 2>(f16tof32(ubo_load_17 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_17_yw = vector<float16_t, 2>(f16tof32(ubo_load_17 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_15_xz[0], ubo_load_15_yw[0], ubo_load_15_xz[1], ubo_load_15_yw[1]), vector<float16_t, 4>(ubo_load_17_xz[0], ubo_load_17_yw[0], ubo_load_17_xz[1], ubo_load_17_yw[1]));
+}
+
+matrix<float16_t, 3, 2> tint_symbol_30(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_33 = ((offset + 0u)) / 4;
+  uint ubo_load_19 = buffer[scalar_offset_33 / 4][scalar_offset_33 % 4];
+  const uint scalar_offset_34 = ((offset + 4u)) / 4;
+  uint ubo_load_20 = buffer[scalar_offset_34 / 4][scalar_offset_34 % 4];
+  const uint scalar_offset_35 = ((offset + 8u)) / 4;
+  uint ubo_load_21 = buffer[scalar_offset_35 / 4][scalar_offset_35 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_19 & 0xFFFF)), float16_t(f16tof32(ubo_load_19 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_20 & 0xFFFF)), float16_t(f16tof32(ubo_load_20 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_21 & 0xFFFF)), float16_t(f16tof32(ubo_load_21 >> 16))));
+}
+
+matrix<float16_t, 3, 3> tint_symbol_31(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_36 = ((offset + 0u)) / 4;
+  uint4 ubo_load_23 = buffer[scalar_offset_36 / 4];
+  uint2 ubo_load_22 = ((scalar_offset_36 & 2) ? ubo_load_23.zw : ubo_load_23.xy);
+  vector<float16_t, 2> ubo_load_22_xz = vector<float16_t, 2>(f16tof32(ubo_load_22 & 0xFFFF));
+  float16_t ubo_load_22_y = f16tof32(ubo_load_22[0] >> 16);
+  const uint scalar_offset_37 = ((offset + 8u)) / 4;
+  uint4 ubo_load_25 = buffer[scalar_offset_37 / 4];
+  uint2 ubo_load_24 = ((scalar_offset_37 & 2) ? ubo_load_25.zw : ubo_load_25.xy);
+  vector<float16_t, 2> ubo_load_24_xz = vector<float16_t, 2>(f16tof32(ubo_load_24 & 0xFFFF));
+  float16_t ubo_load_24_y = f16tof32(ubo_load_24[0] >> 16);
+  const uint scalar_offset_38 = ((offset + 16u)) / 4;
+  uint4 ubo_load_27 = buffer[scalar_offset_38 / 4];
+  uint2 ubo_load_26 = ((scalar_offset_38 & 2) ? ubo_load_27.zw : ubo_load_27.xy);
+  vector<float16_t, 2> ubo_load_26_xz = vector<float16_t, 2>(f16tof32(ubo_load_26 & 0xFFFF));
+  float16_t ubo_load_26_y = f16tof32(ubo_load_26[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_22_xz[0], ubo_load_22_y, ubo_load_22_xz[1]), vector<float16_t, 3>(ubo_load_24_xz[0], ubo_load_24_y, ubo_load_24_xz[1]), vector<float16_t, 3>(ubo_load_26_xz[0], ubo_load_26_y, ubo_load_26_xz[1]));
+}
+
+matrix<float16_t, 3, 4> tint_symbol_32(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_39 = ((offset + 0u)) / 4;
+  uint4 ubo_load_29 = buffer[scalar_offset_39 / 4];
+  uint2 ubo_load_28 = ((scalar_offset_39 & 2) ? ubo_load_29.zw : ubo_load_29.xy);
+  vector<float16_t, 2> ubo_load_28_xz = vector<float16_t, 2>(f16tof32(ubo_load_28 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_28_yw = vector<float16_t, 2>(f16tof32(ubo_load_28 >> 16));
+  const uint scalar_offset_40 = ((offset + 8u)) / 4;
+  uint4 ubo_load_31 = buffer[scalar_offset_40 / 4];
+  uint2 ubo_load_30 = ((scalar_offset_40 & 2) ? ubo_load_31.zw : ubo_load_31.xy);
+  vector<float16_t, 2> ubo_load_30_xz = vector<float16_t, 2>(f16tof32(ubo_load_30 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_30_yw = vector<float16_t, 2>(f16tof32(ubo_load_30 >> 16));
+  const uint scalar_offset_41 = ((offset + 16u)) / 4;
+  uint4 ubo_load_33 = buffer[scalar_offset_41 / 4];
+  uint2 ubo_load_32 = ((scalar_offset_41 & 2) ? ubo_load_33.zw : ubo_load_33.xy);
+  vector<float16_t, 2> ubo_load_32_xz = vector<float16_t, 2>(f16tof32(ubo_load_32 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_32_yw = vector<float16_t, 2>(f16tof32(ubo_load_32 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_28_xz[0], ubo_load_28_yw[0], ubo_load_28_xz[1], ubo_load_28_yw[1]), vector<float16_t, 4>(ubo_load_30_xz[0], ubo_load_30_yw[0], ubo_load_30_xz[1], ubo_load_30_yw[1]), vector<float16_t, 4>(ubo_load_32_xz[0], ubo_load_32_yw[0], ubo_load_32_xz[1], ubo_load_32_yw[1]));
+}
+
+matrix<float16_t, 4, 2> tint_symbol_33(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_42 = ((offset + 0u)) / 4;
+  uint ubo_load_34 = buffer[scalar_offset_42 / 4][scalar_offset_42 % 4];
+  const uint scalar_offset_43 = ((offset + 4u)) / 4;
+  uint ubo_load_35 = buffer[scalar_offset_43 / 4][scalar_offset_43 % 4];
+  const uint scalar_offset_44 = ((offset + 8u)) / 4;
+  uint ubo_load_36 = buffer[scalar_offset_44 / 4][scalar_offset_44 % 4];
+  const uint scalar_offset_45 = ((offset + 12u)) / 4;
+  uint ubo_load_37 = buffer[scalar_offset_45 / 4][scalar_offset_45 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_34 & 0xFFFF)), float16_t(f16tof32(ubo_load_34 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_35 & 0xFFFF)), float16_t(f16tof32(ubo_load_35 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_36 & 0xFFFF)), float16_t(f16tof32(ubo_load_36 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_37 & 0xFFFF)), float16_t(f16tof32(ubo_load_37 >> 16))));
+}
+
+matrix<float16_t, 4, 3> tint_symbol_34(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_46 = ((offset + 0u)) / 4;
+  uint4 ubo_load_39 = buffer[scalar_offset_46 / 4];
+  uint2 ubo_load_38 = ((scalar_offset_46 & 2) ? ubo_load_39.zw : ubo_load_39.xy);
+  vector<float16_t, 2> ubo_load_38_xz = vector<float16_t, 2>(f16tof32(ubo_load_38 & 0xFFFF));
+  float16_t ubo_load_38_y = f16tof32(ubo_load_38[0] >> 16);
+  const uint scalar_offset_47 = ((offset + 8u)) / 4;
+  uint4 ubo_load_41 = buffer[scalar_offset_47 / 4];
+  uint2 ubo_load_40 = ((scalar_offset_47 & 2) ? ubo_load_41.zw : ubo_load_41.xy);
+  vector<float16_t, 2> ubo_load_40_xz = vector<float16_t, 2>(f16tof32(ubo_load_40 & 0xFFFF));
+  float16_t ubo_load_40_y = f16tof32(ubo_load_40[0] >> 16);
+  const uint scalar_offset_48 = ((offset + 16u)) / 4;
+  uint4 ubo_load_43 = buffer[scalar_offset_48 / 4];
+  uint2 ubo_load_42 = ((scalar_offset_48 & 2) ? ubo_load_43.zw : ubo_load_43.xy);
+  vector<float16_t, 2> ubo_load_42_xz = vector<float16_t, 2>(f16tof32(ubo_load_42 & 0xFFFF));
+  float16_t ubo_load_42_y = f16tof32(ubo_load_42[0] >> 16);
+  const uint scalar_offset_49 = ((offset + 24u)) / 4;
+  uint4 ubo_load_45 = buffer[scalar_offset_49 / 4];
+  uint2 ubo_load_44 = ((scalar_offset_49 & 2) ? ubo_load_45.zw : ubo_load_45.xy);
+  vector<float16_t, 2> ubo_load_44_xz = vector<float16_t, 2>(f16tof32(ubo_load_44 & 0xFFFF));
+  float16_t ubo_load_44_y = f16tof32(ubo_load_44[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_38_xz[0], ubo_load_38_y, ubo_load_38_xz[1]), vector<float16_t, 3>(ubo_load_40_xz[0], ubo_load_40_y, ubo_load_40_xz[1]), vector<float16_t, 3>(ubo_load_42_xz[0], ubo_load_42_y, ubo_load_42_xz[1]), vector<float16_t, 3>(ubo_load_44_xz[0], ubo_load_44_y, ubo_load_44_xz[1]));
+}
+
+matrix<float16_t, 4, 4> tint_symbol_35(uint4 buffer[400], uint offset) {
+  const uint scalar_offset_50 = ((offset + 0u)) / 4;
+  uint4 ubo_load_47 = buffer[scalar_offset_50 / 4];
+  uint2 ubo_load_46 = ((scalar_offset_50 & 2) ? ubo_load_47.zw : ubo_load_47.xy);
+  vector<float16_t, 2> ubo_load_46_xz = vector<float16_t, 2>(f16tof32(ubo_load_46 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_46_yw = vector<float16_t, 2>(f16tof32(ubo_load_46 >> 16));
+  const uint scalar_offset_51 = ((offset + 8u)) / 4;
+  uint4 ubo_load_49 = buffer[scalar_offset_51 / 4];
+  uint2 ubo_load_48 = ((scalar_offset_51 & 2) ? ubo_load_49.zw : ubo_load_49.xy);
+  vector<float16_t, 2> ubo_load_48_xz = vector<float16_t, 2>(f16tof32(ubo_load_48 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_48_yw = vector<float16_t, 2>(f16tof32(ubo_load_48 >> 16));
+  const uint scalar_offset_52 = ((offset + 16u)) / 4;
+  uint4 ubo_load_51 = buffer[scalar_offset_52 / 4];
+  uint2 ubo_load_50 = ((scalar_offset_52 & 2) ? ubo_load_51.zw : ubo_load_51.xy);
+  vector<float16_t, 2> ubo_load_50_xz = vector<float16_t, 2>(f16tof32(ubo_load_50 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_50_yw = vector<float16_t, 2>(f16tof32(ubo_load_50 >> 16));
+  const uint scalar_offset_53 = ((offset + 24u)) / 4;
+  uint4 ubo_load_53 = buffer[scalar_offset_53 / 4];
+  uint2 ubo_load_52 = ((scalar_offset_53 & 2) ? ubo_load_53.zw : ubo_load_53.xy);
+  vector<float16_t, 2> ubo_load_52_xz = vector<float16_t, 2>(f16tof32(ubo_load_52 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_52_yw = vector<float16_t, 2>(f16tof32(ubo_load_52 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_46_xz[0], ubo_load_46_yw[0], ubo_load_46_xz[1], ubo_load_46_yw[1]), vector<float16_t, 4>(ubo_load_48_xz[0], ubo_load_48_yw[0], ubo_load_48_xz[1], ubo_load_48_yw[1]), vector<float16_t, 4>(ubo_load_50_xz[0], ubo_load_50_yw[0], ubo_load_50_xz[1], ubo_load_50_yw[1]), vector<float16_t, 4>(ubo_load_52_xz[0], ubo_load_52_yw[0], ubo_load_52_xz[1], ubo_load_52_yw[1]));
+}
+
+typedef float3 tint_symbol_36_ret[2];
+tint_symbol_36_ret tint_symbol_36(uint4 buffer[400], uint offset) {
+  float3 arr_1[2] = (float3[2])0;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      const uint scalar_offset_54 = ((offset + (i * 16u))) / 4;
+      arr_1[i] = asfloat(buffer[scalar_offset_54 / 4].xyz);
+    }
+  }
+  return arr_1;
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_37_ret[2];
+tint_symbol_37_ret tint_symbol_37(uint4 buffer[400], uint offset) {
+  matrix<float16_t, 4, 2> arr_2[2] = (matrix<float16_t, 4, 2>[2])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      arr_2[i_1] = tint_symbol_33(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr_2;
+}
+
+void main_inner(uint idx) {
+  const uint scalar_offset_55 = ((800u * idx)) / 4;
+  const float scalar_f32 = asfloat(ub[scalar_offset_55 / 4][scalar_offset_55 % 4]);
+  const uint scalar_offset_56 = (((800u * idx) + 4u)) / 4;
+  const int scalar_i32 = asint(ub[scalar_offset_56 / 4][scalar_offset_56 % 4]);
+  const uint scalar_offset_57 = (((800u * idx) + 8u)) / 4;
+  const uint scalar_u32 = ub[scalar_offset_57 / 4][scalar_offset_57 % 4];
+  const uint scalar_offset_bytes = (((800u * idx) + 12u));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t scalar_f16 = float16_t(f16tof32(((ub[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  const uint scalar_offset_58 = (((800u * idx) + 16u)) / 4;
+  uint4 ubo_load_54 = ub[scalar_offset_58 / 4];
+  const float2 vec2_f32 = asfloat(((scalar_offset_58 & 2) ? ubo_load_54.zw : ubo_load_54.xy));
+  const uint scalar_offset_59 = (((800u * idx) + 24u)) / 4;
+  uint4 ubo_load_55 = ub[scalar_offset_59 / 4];
+  const int2 vec2_i32 = asint(((scalar_offset_59 & 2) ? ubo_load_55.zw : ubo_load_55.xy));
+  const uint scalar_offset_60 = (((800u * idx) + 32u)) / 4;
+  uint4 ubo_load_56 = ub[scalar_offset_60 / 4];
+  const uint2 vec2_u32 = ((scalar_offset_60 & 2) ? ubo_load_56.zw : ubo_load_56.xy);
+  const uint scalar_offset_61 = (((800u * idx) + 40u)) / 4;
+  uint ubo_load_57 = ub[scalar_offset_61 / 4][scalar_offset_61 % 4];
+  const vector<float16_t, 2> vec2_f16 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_57 & 0xFFFF)), float16_t(f16tof32(ubo_load_57 >> 16)));
+  const uint scalar_offset_62 = (((800u * idx) + 48u)) / 4;
+  const float3 vec3_f32 = asfloat(ub[scalar_offset_62 / 4].xyz);
+  const uint scalar_offset_63 = (((800u * idx) + 64u)) / 4;
+  const int3 vec3_i32 = asint(ub[scalar_offset_63 / 4].xyz);
+  const uint scalar_offset_64 = (((800u * idx) + 80u)) / 4;
+  const uint3 vec3_u32 = ub[scalar_offset_64 / 4].xyz;
+  const uint scalar_offset_65 = (((800u * idx) + 96u)) / 4;
+  uint4 ubo_load_59 = ub[scalar_offset_65 / 4];
+  uint2 ubo_load_58 = ((scalar_offset_65 & 2) ? ubo_load_59.zw : ubo_load_59.xy);
+  vector<float16_t, 2> ubo_load_58_xz = vector<float16_t, 2>(f16tof32(ubo_load_58 & 0xFFFF));
+  float16_t ubo_load_58_y = f16tof32(ubo_load_58[0] >> 16);
+  const vector<float16_t, 3> vec3_f16 = vector<float16_t, 3>(ubo_load_58_xz[0], ubo_load_58_y, ubo_load_58_xz[1]);
+  const uint scalar_offset_66 = (((800u * idx) + 112u)) / 4;
+  const float4 vec4_f32 = asfloat(ub[scalar_offset_66 / 4]);
+  const uint scalar_offset_67 = (((800u * idx) + 128u)) / 4;
+  const int4 vec4_i32 = asint(ub[scalar_offset_67 / 4]);
+  const uint scalar_offset_68 = (((800u * idx) + 144u)) / 4;
+  const uint4 vec4_u32 = ub[scalar_offset_68 / 4];
+  const uint scalar_offset_69 = (((800u * idx) + 160u)) / 4;
+  uint4 ubo_load_61 = ub[scalar_offset_69 / 4];
+  uint2 ubo_load_60 = ((scalar_offset_69 & 2) ? ubo_load_61.zw : ubo_load_61.xy);
+  vector<float16_t, 2> ubo_load_60_xz = vector<float16_t, 2>(f16tof32(ubo_load_60 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_60_yw = vector<float16_t, 2>(f16tof32(ubo_load_60 >> 16));
+  const vector<float16_t, 4> vec4_f16 = vector<float16_t, 4>(ubo_load_60_xz[0], ubo_load_60_yw[0], ubo_load_60_xz[1], ubo_load_60_yw[1]);
+  const float2x2 mat2x2_f32 = tint_symbol_18(ub, ((800u * idx) + 168u));
+  const float2x3 mat2x3_f32 = tint_symbol_19(ub, ((800u * idx) + 192u));
+  const float2x4 mat2x4_f32 = tint_symbol_20(ub, ((800u * idx) + 224u));
+  const float3x2 mat3x2_f32 = tint_symbol_21(ub, ((800u * idx) + 256u));
+  const float3x3 mat3x3_f32 = tint_symbol_22(ub, ((800u * idx) + 288u));
+  const float3x4 mat3x4_f32 = tint_symbol_23(ub, ((800u * idx) + 336u));
+  const float4x2 mat4x2_f32 = tint_symbol_24(ub, ((800u * idx) + 384u));
+  const float4x3 mat4x3_f32 = tint_symbol_25(ub, ((800u * idx) + 416u));
+  const float4x4 mat4x4_f32 = tint_symbol_26(ub, ((800u * idx) + 480u));
+  const matrix<float16_t, 2, 2> mat2x2_f16 = tint_symbol_27(ub, ((800u * idx) + 544u));
+  const matrix<float16_t, 2, 3> mat2x3_f16 = tint_symbol_28(ub, ((800u * idx) + 552u));
+  const matrix<float16_t, 2, 4> mat2x4_f16 = tint_symbol_29(ub, ((800u * idx) + 568u));
+  const matrix<float16_t, 3, 2> mat3x2_f16 = tint_symbol_30(ub, ((800u * idx) + 584u));
+  const matrix<float16_t, 3, 3> mat3x3_f16 = tint_symbol_31(ub, ((800u * idx) + 600u));
+  const matrix<float16_t, 3, 4> mat3x4_f16 = tint_symbol_32(ub, ((800u * idx) + 624u));
+  const matrix<float16_t, 4, 2> mat4x2_f16 = tint_symbol_33(ub, ((800u * idx) + 648u));
+  const matrix<float16_t, 4, 3> mat4x3_f16 = tint_symbol_34(ub, ((800u * idx) + 664u));
+  const matrix<float16_t, 4, 4> mat4x4_f16 = tint_symbol_35(ub, ((800u * idx) + 696u));
+  const float3 arr2_vec3_f32[2] = tint_symbol_36(ub, ((800u * idx) + 736u));
+  const matrix<float16_t, 4, 2> arr2_mat4x2_f16[2] = tint_symbol_37(ub, ((800u * idx) + 768u));
+}
+
+[numthreads(1, 1, 1)]
+void main(tint_symbol_1 tint_symbol) {
+  main_inner(tint_symbol.idx);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000183446E4210(81,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..a03e972
--- /dev/null
+++ b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.glsl

@@ -0,0 +1,268 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x2_f16_4 {
+  f16vec2 col0;
+  f16vec2 col1;
+  f16vec2 col2;
+  f16vec2 col3;
+};
+
+struct Inner {
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
+  float16_t scalar_f16;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  f16vec2 vec2_f16;
+  uint pad;
+  vec3 vec3_f32;
+  uint pad_1;
+  ivec3 vec3_i32;
+  uint pad_2;
+  uvec3 vec3_u32;
+  uint pad_3;
+  f16vec3 vec3_f16;
+  uint pad_4;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  f16vec4 vec4_f16;
+  mat2 mat2x2_f32;
+  uint pad_6;
+  uint pad_7;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_8;
+  uint pad_9;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  f16mat2 mat2x2_f16;
+  f16mat2x3 mat2x3_f16;
+  f16mat2x4 mat2x4_f16;
+  f16mat3x2 mat3x2_f16;
+  uint pad_10;
+  f16mat3 mat3x3_f16;
+  f16mat3x4 mat3x4_f16;
+  f16mat4x2 mat4x2_f16;
+  f16mat4x3 mat4x3_f16;
+  f16mat4 mat4x4_f16;
+  uint pad_11;
+  uint pad_12;
+  vec3 arr2_vec3_f32[2];
+  f16mat4x2 arr2_mat4x2_f16[2];
+};
+
+struct Inner_std140 {
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
+  float16_t scalar_f16;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  f16vec2 vec2_f16;
+  uint pad;
+  vec3 vec3_f32;
+  uint pad_1;
+  ivec3 vec3_i32;
+  uint pad_2;
+  uvec3 vec3_u32;
+  uint pad_3;
+  f16vec3 vec3_f16;
+  uint pad_4;
+  uint pad_5;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  f16vec4 vec4_f16;
+  vec2 mat2x2_f32_0;
+  vec2 mat2x2_f32_1;
+  uint pad_6;
+  uint pad_7;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  vec2 mat3x2_f32_0;
+  vec2 mat3x2_f32_1;
+  vec2 mat3x2_f32_2;
+  uint pad_8;
+  uint pad_9;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  vec2 mat4x2_f32_0;
+  vec2 mat4x2_f32_1;
+  vec2 mat4x2_f32_2;
+  vec2 mat4x2_f32_3;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  f16vec2 mat2x2_f16_0;
+  f16vec2 mat2x2_f16_1;
+  f16vec3 mat2x3_f16_0;
+  f16vec3 mat2x3_f16_1;
+  f16vec4 mat2x4_f16_0;
+  f16vec4 mat2x4_f16_1;
+  f16vec2 mat3x2_f16_0;
+  f16vec2 mat3x2_f16_1;
+  f16vec2 mat3x2_f16_2;
+  uint pad_10;
+  f16vec3 mat3x3_f16_0;
+  f16vec3 mat3x3_f16_1;
+  f16vec3 mat3x3_f16_2;
+  f16vec4 mat3x4_f16_0;
+  f16vec4 mat3x4_f16_1;
+  f16vec4 mat3x4_f16_2;
+  f16vec2 mat4x2_f16_0;
+  f16vec2 mat4x2_f16_1;
+  f16vec2 mat4x2_f16_2;
+  f16vec2 mat4x2_f16_3;
+  f16vec3 mat4x3_f16_0;
+  f16vec3 mat4x3_f16_1;
+  f16vec3 mat4x3_f16_2;
+  f16vec3 mat4x3_f16_3;
+  f16vec4 mat4x4_f16_0;
+  f16vec4 mat4x4_f16_1;
+  f16vec4 mat4x4_f16_2;
+  f16vec4 mat4x4_f16_3;
+  uint pad_11;
+  uint pad_12;
+  vec3 arr2_vec3_f32[2];
+  mat4x2_f16_4 arr2_mat4x2_f16[2];
+};
+
+struct S {
+  Inner arr[8];
+};
+
+struct S_std140 {
+  Inner_std140 arr[8];
+};
+
+layout(binding = 0, std140) uniform ub_block_std140_ubo {
+  S_std140 inner;
+} ub;
+
+mat2 load_ub_inner_arr_p0_mat2x2_f32(uint p0) {
+  uint s_save = p0;
+  return mat2(ub.inner.arr[s_save].mat2x2_f32_0, ub.inner.arr[s_save].mat2x2_f32_1);
+}
+
+mat3x2 load_ub_inner_arr_p0_mat3x2_f32(uint p0) {
+  uint s_save_1 = p0;
+  return mat3x2(ub.inner.arr[s_save_1].mat3x2_f32_0, ub.inner.arr[s_save_1].mat3x2_f32_1, ub.inner.arr[s_save_1].mat3x2_f32_2);
+}
+
+mat4x2 load_ub_inner_arr_p0_mat4x2_f32(uint p0) {
+  uint s_save_2 = p0;
+  return mat4x2(ub.inner.arr[s_save_2].mat4x2_f32_0, ub.inner.arr[s_save_2].mat4x2_f32_1, ub.inner.arr[s_save_2].mat4x2_f32_2, ub.inner.arr[s_save_2].mat4x2_f32_3);
+}
+
+f16mat2 load_ub_inner_arr_p0_mat2x2_f16(uint p0) {
+  uint s_save_3 = p0;
+  return f16mat2(ub.inner.arr[s_save_3].mat2x2_f16_0, ub.inner.arr[s_save_3].mat2x2_f16_1);
+}
+
+f16mat2x3 load_ub_inner_arr_p0_mat2x3_f16(uint p0) {
+  uint s_save_4 = p0;
+  return f16mat2x3(ub.inner.arr[s_save_4].mat2x3_f16_0, ub.inner.arr[s_save_4].mat2x3_f16_1);
+}
+
+f16mat2x4 load_ub_inner_arr_p0_mat2x4_f16(uint p0) {
+  uint s_save_5 = p0;
+  return f16mat2x4(ub.inner.arr[s_save_5].mat2x4_f16_0, ub.inner.arr[s_save_5].mat2x4_f16_1);
+}
+
+f16mat3x2 load_ub_inner_arr_p0_mat3x2_f16(uint p0) {
+  uint s_save_6 = p0;
+  return f16mat3x2(ub.inner.arr[s_save_6].mat3x2_f16_0, ub.inner.arr[s_save_6].mat3x2_f16_1, ub.inner.arr[s_save_6].mat3x2_f16_2);
+}
+
+f16mat3 load_ub_inner_arr_p0_mat3x3_f16(uint p0) {
+  uint s_save_7 = p0;
+  return f16mat3(ub.inner.arr[s_save_7].mat3x3_f16_0, ub.inner.arr[s_save_7].mat3x3_f16_1, ub.inner.arr[s_save_7].mat3x3_f16_2);
+}
+
+f16mat3x4 load_ub_inner_arr_p0_mat3x4_f16(uint p0) {
+  uint s_save_8 = p0;
+  return f16mat3x4(ub.inner.arr[s_save_8].mat3x4_f16_0, ub.inner.arr[s_save_8].mat3x4_f16_1, ub.inner.arr[s_save_8].mat3x4_f16_2);
+}
+
+f16mat4x2 load_ub_inner_arr_p0_mat4x2_f16(uint p0) {
+  uint s_save_9 = p0;
+  return f16mat4x2(ub.inner.arr[s_save_9].mat4x2_f16_0, ub.inner.arr[s_save_9].mat4x2_f16_1, ub.inner.arr[s_save_9].mat4x2_f16_2, ub.inner.arr[s_save_9].mat4x2_f16_3);
+}
+
+f16mat4x3 load_ub_inner_arr_p0_mat4x3_f16(uint p0) {
+  uint s_save_10 = p0;
+  return f16mat4x3(ub.inner.arr[s_save_10].mat4x3_f16_0, ub.inner.arr[s_save_10].mat4x3_f16_1, ub.inner.arr[s_save_10].mat4x3_f16_2, ub.inner.arr[s_save_10].mat4x3_f16_3);
+}
+
+f16mat4 load_ub_inner_arr_p0_mat4x4_f16(uint p0) {
+  uint s_save_11 = p0;
+  return f16mat4(ub.inner.arr[s_save_11].mat4x4_f16_0, ub.inner.arr[s_save_11].mat4x4_f16_1, ub.inner.arr[s_save_11].mat4x4_f16_2, ub.inner.arr[s_save_11].mat4x4_f16_3);
+}
+
+f16mat4x2 conv_mat4x2_f16(mat4x2_f16_4 val) {
+  return f16mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x2[2] conv_arr2_mat4x2_f16(mat4x2_f16_4 val[2]) {
+  f16mat4x2 arr[2] = f16mat4x2[2](f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      arr[i] = conv_mat4x2_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void tint_symbol(uint idx) {
+  float scalar_f32 = ub.inner.arr[idx].scalar_f32;
+  int scalar_i32 = ub.inner.arr[idx].scalar_i32;
+  uint scalar_u32 = ub.inner.arr[idx].scalar_u32;
+  float16_t scalar_f16 = ub.inner.arr[idx].scalar_f16;
+  vec2 vec2_f32 = ub.inner.arr[idx].vec2_f32;
+  ivec2 vec2_i32 = ub.inner.arr[idx].vec2_i32;
+  uvec2 vec2_u32 = ub.inner.arr[idx].vec2_u32;
+  f16vec2 vec2_f16 = ub.inner.arr[idx].vec2_f16;
+  vec3 vec3_f32 = ub.inner.arr[idx].vec3_f32;
+  ivec3 vec3_i32 = ub.inner.arr[idx].vec3_i32;
+  uvec3 vec3_u32 = ub.inner.arr[idx].vec3_u32;
+  f16vec3 vec3_f16 = ub.inner.arr[idx].vec3_f16;
+  vec4 vec4_f32 = ub.inner.arr[idx].vec4_f32;
+  ivec4 vec4_i32 = ub.inner.arr[idx].vec4_i32;
+  uvec4 vec4_u32 = ub.inner.arr[idx].vec4_u32;
+  f16vec4 vec4_f16 = ub.inner.arr[idx].vec4_f16;
+  mat2 mat2x2_f32 = load_ub_inner_arr_p0_mat2x2_f32(uint(idx));
+  mat2x3 mat2x3_f32 = ub.inner.arr[idx].mat2x3_f32;
+  mat2x4 mat2x4_f32 = ub.inner.arr[idx].mat2x4_f32;
+  mat3x2 mat3x2_f32 = load_ub_inner_arr_p0_mat3x2_f32(uint(idx));
+  mat3 mat3x3_f32 = ub.inner.arr[idx].mat3x3_f32;
+  mat3x4 mat3x4_f32 = ub.inner.arr[idx].mat3x4_f32;
+  mat4x2 mat4x2_f32 = load_ub_inner_arr_p0_mat4x2_f32(uint(idx));
+  mat4x3 mat4x3_f32 = ub.inner.arr[idx].mat4x3_f32;
+  mat4 mat4x4_f32 = ub.inner.arr[idx].mat4x4_f32;
+  f16mat2 mat2x2_f16 = load_ub_inner_arr_p0_mat2x2_f16(uint(idx));
+  f16mat2x3 mat2x3_f16 = load_ub_inner_arr_p0_mat2x3_f16(uint(idx));
+  f16mat2x4 mat2x4_f16 = load_ub_inner_arr_p0_mat2x4_f16(uint(idx));
+  f16mat3x2 mat3x2_f16 = load_ub_inner_arr_p0_mat3x2_f16(uint(idx));
+  f16mat3 mat3x3_f16 = load_ub_inner_arr_p0_mat3x3_f16(uint(idx));
+  f16mat3x4 mat3x4_f16 = load_ub_inner_arr_p0_mat3x4_f16(uint(idx));
+  f16mat4x2 mat4x2_f16 = load_ub_inner_arr_p0_mat4x2_f16(uint(idx));
+  f16mat4x3 mat4x3_f16 = load_ub_inner_arr_p0_mat4x3_f16(uint(idx));
+  f16mat4 mat4x4_f16 = load_ub_inner_arr_p0_mat4x4_f16(uint(idx));
+  vec3 arr2_vec3_f32[2] = ub.inner.arr[idx].arr2_vec3_f32;
+  f16mat4x2 arr2_mat4x2_f16[2] = conv_arr2_mat4x2_f16(ub.inner.arr[idx].arr2_mat4x2_f16);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.msl b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.msl
new file mode 100644
index 0000000..f6b667e
--- /dev/null
+++ b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.msl

@@ -0,0 +1,113 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ half scalar_f16;
+  /* 0x000e */ tint_array<int8_t, 2> tint_pad;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ half2 vec2_f16;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_3;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0060 */ packed_half3 vec3_f16;
+  /* 0x0066 */ tint_array<int8_t, 10> tint_pad_5;
+  /* 0x0070 */ float4 vec4_f32;
+  /* 0x0080 */ int4 vec4_i32;
+  /* 0x0090 */ uint4 vec4_u32;
+  /* 0x00a0 */ half4 vec4_f16;
+  /* 0x00a8 */ float2x2 mat2x2_f32;
+  /* 0x00b8 */ tint_array<int8_t, 8> tint_pad_6;
+  /* 0x00c0 */ float2x3 mat2x3_f32;
+  /* 0x00e0 */ float2x4 mat2x4_f32;
+  /* 0x0100 */ float3x2 mat3x2_f32;
+  /* 0x0118 */ tint_array<int8_t, 8> tint_pad_7;
+  /* 0x0120 */ float3x3 mat3x3_f32;
+  /* 0x0150 */ float3x4 mat3x4_f32;
+  /* 0x0180 */ float4x2 mat4x2_f32;
+  /* 0x01a0 */ float4x3 mat4x3_f32;
+  /* 0x01e0 */ float4x4 mat4x4_f32;
+  /* 0x0220 */ half2x2 mat2x2_f16;
+  /* 0x0228 */ half2x3 mat2x3_f16;
+  /* 0x0238 */ half2x4 mat2x4_f16;
+  /* 0x0248 */ half3x2 mat3x2_f16;
+  /* 0x0254 */ tint_array<int8_t, 4> tint_pad_8;
+  /* 0x0258 */ half3x3 mat3x3_f16;
+  /* 0x0270 */ half3x4 mat3x4_f16;
+  /* 0x0288 */ half4x2 mat4x2_f16;
+  /* 0x0298 */ half4x3 mat4x3_f16;
+  /* 0x02b8 */ half4x4 mat4x4_f16;
+  /* 0x02d8 */ tint_array<int8_t, 8> tint_pad_9;
+  /* 0x02e0 */ tint_array<float3, 2> arr2_vec3_f32;
+  /* 0x0300 */ tint_array<half4x2, 2> arr2_mat4x2_f16;
+};
+
+struct S {
+  /* 0x0000 */ tint_array<Inner, 8> arr;
+};
+
+void tint_symbol_inner(uint idx, const constant S* const tint_symbol_1) {
+  float const scalar_f32 = (*(tint_symbol_1)).arr[idx].scalar_f32;
+  int const scalar_i32 = (*(tint_symbol_1)).arr[idx].scalar_i32;
+  uint const scalar_u32 = (*(tint_symbol_1)).arr[idx].scalar_u32;
+  half const scalar_f16 = (*(tint_symbol_1)).arr[idx].scalar_f16;
+  float2 const vec2_f32 = (*(tint_symbol_1)).arr[idx].vec2_f32;
+  int2 const vec2_i32 = (*(tint_symbol_1)).arr[idx].vec2_i32;
+  uint2 const vec2_u32 = (*(tint_symbol_1)).arr[idx].vec2_u32;
+  half2 const vec2_f16 = (*(tint_symbol_1)).arr[idx].vec2_f16;
+  float3 const vec3_f32 = float3((*(tint_symbol_1)).arr[idx].vec3_f32);
+  int3 const vec3_i32 = int3((*(tint_symbol_1)).arr[idx].vec3_i32);
+  uint3 const vec3_u32 = uint3((*(tint_symbol_1)).arr[idx].vec3_u32);
+  half3 const vec3_f16 = half3((*(tint_symbol_1)).arr[idx].vec3_f16);
+  float4 const vec4_f32 = (*(tint_symbol_1)).arr[idx].vec4_f32;
+  int4 const vec4_i32 = (*(tint_symbol_1)).arr[idx].vec4_i32;
+  uint4 const vec4_u32 = (*(tint_symbol_1)).arr[idx].vec4_u32;
+  half4 const vec4_f16 = (*(tint_symbol_1)).arr[idx].vec4_f16;
+  float2x2 const mat2x2_f32 = (*(tint_symbol_1)).arr[idx].mat2x2_f32;
+  float2x3 const mat2x3_f32 = (*(tint_symbol_1)).arr[idx].mat2x3_f32;
+  float2x4 const mat2x4_f32 = (*(tint_symbol_1)).arr[idx].mat2x4_f32;
+  float3x2 const mat3x2_f32 = (*(tint_symbol_1)).arr[idx].mat3x2_f32;
+  float3x3 const mat3x3_f32 = (*(tint_symbol_1)).arr[idx].mat3x3_f32;
+  float3x4 const mat3x4_f32 = (*(tint_symbol_1)).arr[idx].mat3x4_f32;
+  float4x2 const mat4x2_f32 = (*(tint_symbol_1)).arr[idx].mat4x2_f32;
+  float4x3 const mat4x3_f32 = (*(tint_symbol_1)).arr[idx].mat4x3_f32;
+  float4x4 const mat4x4_f32 = (*(tint_symbol_1)).arr[idx].mat4x4_f32;
+  half2x2 const mat2x2_f16 = (*(tint_symbol_1)).arr[idx].mat2x2_f16;
+  half2x3 const mat2x3_f16 = (*(tint_symbol_1)).arr[idx].mat2x3_f16;
+  half2x4 const mat2x4_f16 = (*(tint_symbol_1)).arr[idx].mat2x4_f16;
+  half3x2 const mat3x2_f16 = (*(tint_symbol_1)).arr[idx].mat3x2_f16;
+  half3x3 const mat3x3_f16 = (*(tint_symbol_1)).arr[idx].mat3x3_f16;
+  half3x4 const mat3x4_f16 = (*(tint_symbol_1)).arr[idx].mat3x4_f16;
+  half4x2 const mat4x2_f16 = (*(tint_symbol_1)).arr[idx].mat4x2_f16;
+  half4x3 const mat4x3_f16 = (*(tint_symbol_1)).arr[idx].mat4x3_f16;
+  half4x4 const mat4x4_f16 = (*(tint_symbol_1)).arr[idx].mat4x4_f16;
+  tint_array<float3, 2> const arr2_vec3_f32 = (*(tint_symbol_1)).arr[idx].arr2_vec3_f32;
+  tint_array<half4x2, 2> const arr2_mat4x2_f16 = (*(tint_symbol_1)).arr[idx].arr2_mat4x2_f16;
+}
+
+kernel void tint_symbol(const constant S* tint_symbol_2 [[buffer(0)]], uint idx [[thread_index_in_threadgroup]]) {
+  tint_symbol_inner(idx, tint_symbol_2);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..e504459
--- /dev/null
+++ b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.spvasm

@@ -0,0 +1,638 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 450
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %idx_1
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %idx_1 "idx_1"
+               OpName %ub_block_std140 "ub_block_std140"
+               OpMemberName %ub_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "arr"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "scalar_f32"
+               OpMemberName %Inner_std140 1 "scalar_i32"
+               OpMemberName %Inner_std140 2 "scalar_u32"
+               OpMemberName %Inner_std140 3 "scalar_f16"
+               OpMemberName %Inner_std140 4 "vec2_f32"
+               OpMemberName %Inner_std140 5 "vec2_i32"
+               OpMemberName %Inner_std140 6 "vec2_u32"
+               OpMemberName %Inner_std140 7 "vec2_f16"
+               OpMemberName %Inner_std140 8 "vec3_f32"
+               OpMemberName %Inner_std140 9 "vec3_i32"
+               OpMemberName %Inner_std140 10 "vec3_u32"
+               OpMemberName %Inner_std140 11 "vec3_f16"
+               OpMemberName %Inner_std140 12 "vec4_f32"
+               OpMemberName %Inner_std140 13 "vec4_i32"
+               OpMemberName %Inner_std140 14 "vec4_u32"
+               OpMemberName %Inner_std140 15 "vec4_f16"
+               OpMemberName %Inner_std140 16 "mat2x2_f32_0"
+               OpMemberName %Inner_std140 17 "mat2x2_f32_1"
+               OpMemberName %Inner_std140 18 "mat2x3_f32"
+               OpMemberName %Inner_std140 19 "mat2x4_f32"
+               OpMemberName %Inner_std140 20 "mat3x2_f32_0"
+               OpMemberName %Inner_std140 21 "mat3x2_f32_1"
+               OpMemberName %Inner_std140 22 "mat3x2_f32_2"
+               OpMemberName %Inner_std140 23 "mat3x3_f32"
+               OpMemberName %Inner_std140 24 "mat3x4_f32"
+               OpMemberName %Inner_std140 25 "mat4x2_f32_0"
+               OpMemberName %Inner_std140 26 "mat4x2_f32_1"
+               OpMemberName %Inner_std140 27 "mat4x2_f32_2"
+               OpMemberName %Inner_std140 28 "mat4x2_f32_3"
+               OpMemberName %Inner_std140 29 "mat4x3_f32"
+               OpMemberName %Inner_std140 30 "mat4x4_f32"
+               OpMemberName %Inner_std140 31 "mat2x2_f16_0"
+               OpMemberName %Inner_std140 32 "mat2x2_f16_1"
+               OpMemberName %Inner_std140 33 "mat2x3_f16_0"
+               OpMemberName %Inner_std140 34 "mat2x3_f16_1"
+               OpMemberName %Inner_std140 35 "mat2x4_f16_0"
+               OpMemberName %Inner_std140 36 "mat2x4_f16_1"
+               OpMemberName %Inner_std140 37 "mat3x2_f16_0"
+               OpMemberName %Inner_std140 38 "mat3x2_f16_1"
+               OpMemberName %Inner_std140 39 "mat3x2_f16_2"
+               OpMemberName %Inner_std140 40 "mat3x3_f16_0"
+               OpMemberName %Inner_std140 41 "mat3x3_f16_1"
+               OpMemberName %Inner_std140 42 "mat3x3_f16_2"
+               OpMemberName %Inner_std140 43 "mat3x4_f16_0"
+               OpMemberName %Inner_std140 44 "mat3x4_f16_1"
+               OpMemberName %Inner_std140 45 "mat3x4_f16_2"
+               OpMemberName %Inner_std140 46 "mat4x2_f16_0"
+               OpMemberName %Inner_std140 47 "mat4x2_f16_1"
+               OpMemberName %Inner_std140 48 "mat4x2_f16_2"
+               OpMemberName %Inner_std140 49 "mat4x2_f16_3"
+               OpMemberName %Inner_std140 50 "mat4x3_f16_0"
+               OpMemberName %Inner_std140 51 "mat4x3_f16_1"
+               OpMemberName %Inner_std140 52 "mat4x3_f16_2"
+               OpMemberName %Inner_std140 53 "mat4x3_f16_3"
+               OpMemberName %Inner_std140 54 "mat4x4_f16_0"
+               OpMemberName %Inner_std140 55 "mat4x4_f16_1"
+               OpMemberName %Inner_std140 56 "mat4x4_f16_2"
+               OpMemberName %Inner_std140 57 "mat4x4_f16_3"
+               OpMemberName %Inner_std140 58 "arr2_vec3_f32"
+               OpMemberName %Inner_std140 59 "arr2_mat4x2_f16"
+               OpName %mat4x2_f16_4 "mat4x2_f16_4"
+               OpMemberName %mat4x2_f16_4 0 "col0"
+               OpMemberName %mat4x2_f16_4 1 "col1"
+               OpMemberName %mat4x2_f16_4 2 "col2"
+               OpMemberName %mat4x2_f16_4 3 "col3"
+               OpName %ub "ub"
+               OpName %load_ub_inner_arr_p0_mat2x2_f32 "load_ub_inner_arr_p0_mat2x2_f32"
+               OpName %p0 "p0"
+               OpName %load_ub_inner_arr_p0_mat3x2_f32 "load_ub_inner_arr_p0_mat3x2_f32"
+               OpName %p0_0 "p0"
+               OpName %load_ub_inner_arr_p0_mat4x2_f32 "load_ub_inner_arr_p0_mat4x2_f32"
+               OpName %p0_1 "p0"
+               OpName %load_ub_inner_arr_p0_mat2x2_f16 "load_ub_inner_arr_p0_mat2x2_f16"
+               OpName %p0_2 "p0"
+               OpName %load_ub_inner_arr_p0_mat2x3_f16 "load_ub_inner_arr_p0_mat2x3_f16"
+               OpName %p0_3 "p0"
+               OpName %load_ub_inner_arr_p0_mat2x4_f16 "load_ub_inner_arr_p0_mat2x4_f16"
+               OpName %p0_4 "p0"
+               OpName %load_ub_inner_arr_p0_mat3x2_f16 "load_ub_inner_arr_p0_mat3x2_f16"
+               OpName %p0_5 "p0"
+               OpName %load_ub_inner_arr_p0_mat3x3_f16 "load_ub_inner_arr_p0_mat3x3_f16"
+               OpName %p0_6 "p0"
+               OpName %load_ub_inner_arr_p0_mat3x4_f16 "load_ub_inner_arr_p0_mat3x4_f16"
+               OpName %p0_7 "p0"
+               OpName %load_ub_inner_arr_p0_mat4x2_f16 "load_ub_inner_arr_p0_mat4x2_f16"
+               OpName %p0_8 "p0"
+               OpName %load_ub_inner_arr_p0_mat4x3_f16 "load_ub_inner_arr_p0_mat4x3_f16"
+               OpName %p0_9 "p0"
+               OpName %load_ub_inner_arr_p0_mat4x4_f16 "load_ub_inner_arr_p0_mat4x4_f16"
+               OpName %p0_10 "p0"
+               OpName %conv_mat4x2_f16 "conv_mat4x2_f16"
+               OpName %val "val"
+               OpName %conv_arr2_mat4x2_f16 "conv_arr2_mat4x2_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %main_inner "main_inner"
+               OpName %idx "idx"
+               OpName %main "main"
+               OpDecorate %idx_1 BuiltIn LocalInvocationIndex
+               OpDecorate %ub_block_std140 Block
+               OpMemberDecorate %ub_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 4
+               OpMemberDecorate %Inner_std140 2 Offset 8
+               OpMemberDecorate %Inner_std140 3 Offset 12
+               OpMemberDecorate %Inner_std140 4 Offset 16
+               OpMemberDecorate %Inner_std140 5 Offset 24
+               OpMemberDecorate %Inner_std140 6 Offset 32
+               OpMemberDecorate %Inner_std140 7 Offset 40
+               OpMemberDecorate %Inner_std140 8 Offset 48
+               OpMemberDecorate %Inner_std140 9 Offset 64
+               OpMemberDecorate %Inner_std140 10 Offset 80
+               OpMemberDecorate %Inner_std140 11 Offset 96
+               OpMemberDecorate %Inner_std140 12 Offset 112
+               OpMemberDecorate %Inner_std140 13 Offset 128
+               OpMemberDecorate %Inner_std140 14 Offset 144
+               OpMemberDecorate %Inner_std140 15 Offset 160
+               OpMemberDecorate %Inner_std140 16 Offset 168
+               OpMemberDecorate %Inner_std140 17 Offset 176
+               OpMemberDecorate %Inner_std140 18 Offset 192
+               OpMemberDecorate %Inner_std140 18 ColMajor
+               OpMemberDecorate %Inner_std140 18 MatrixStride 16
+               OpMemberDecorate %Inner_std140 19 Offset 224
+               OpMemberDecorate %Inner_std140 19 ColMajor
+               OpMemberDecorate %Inner_std140 19 MatrixStride 16
+               OpMemberDecorate %Inner_std140 20 Offset 256
+               OpMemberDecorate %Inner_std140 21 Offset 264
+               OpMemberDecorate %Inner_std140 22 Offset 272
+               OpMemberDecorate %Inner_std140 23 Offset 288
+               OpMemberDecorate %Inner_std140 23 ColMajor
+               OpMemberDecorate %Inner_std140 23 MatrixStride 16
+               OpMemberDecorate %Inner_std140 24 Offset 336
+               OpMemberDecorate %Inner_std140 24 ColMajor
+               OpMemberDecorate %Inner_std140 24 MatrixStride 16
+               OpMemberDecorate %Inner_std140 25 Offset 384
+               OpMemberDecorate %Inner_std140 26 Offset 392
+               OpMemberDecorate %Inner_std140 27 Offset 400
+               OpMemberDecorate %Inner_std140 28 Offset 408
+               OpMemberDecorate %Inner_std140 29 Offset 416
+               OpMemberDecorate %Inner_std140 29 ColMajor
+               OpMemberDecorate %Inner_std140 29 MatrixStride 16
+               OpMemberDecorate %Inner_std140 30 Offset 480
+               OpMemberDecorate %Inner_std140 30 ColMajor
+               OpMemberDecorate %Inner_std140 30 MatrixStride 16
+               OpMemberDecorate %Inner_std140 31 Offset 544
+               OpMemberDecorate %Inner_std140 32 Offset 548
+               OpMemberDecorate %Inner_std140 33 Offset 552
+               OpMemberDecorate %Inner_std140 34 Offset 560
+               OpMemberDecorate %Inner_std140 35 Offset 568
+               OpMemberDecorate %Inner_std140 36 Offset 576
+               OpMemberDecorate %Inner_std140 37 Offset 584
+               OpMemberDecorate %Inner_std140 38 Offset 588
+               OpMemberDecorate %Inner_std140 39 Offset 592
+               OpMemberDecorate %Inner_std140 40 Offset 600
+               OpMemberDecorate %Inner_std140 41 Offset 608
+               OpMemberDecorate %Inner_std140 42 Offset 616
+               OpMemberDecorate %Inner_std140 43 Offset 624
+               OpMemberDecorate %Inner_std140 44 Offset 632
+               OpMemberDecorate %Inner_std140 45 Offset 640
+               OpMemberDecorate %Inner_std140 46 Offset 648
+               OpMemberDecorate %Inner_std140 47 Offset 652
+               OpMemberDecorate %Inner_std140 48 Offset 656
+               OpMemberDecorate %Inner_std140 49 Offset 660
+               OpMemberDecorate %Inner_std140 50 Offset 664
+               OpMemberDecorate %Inner_std140 51 Offset 672
+               OpMemberDecorate %Inner_std140 52 Offset 680
+               OpMemberDecorate %Inner_std140 53 Offset 688
+               OpMemberDecorate %Inner_std140 54 Offset 696
+               OpMemberDecorate %Inner_std140 55 Offset 704
+               OpMemberDecorate %Inner_std140 56 Offset 712
+               OpMemberDecorate %Inner_std140 57 Offset 720
+               OpMemberDecorate %Inner_std140 58 Offset 736
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpMemberDecorate %Inner_std140 59 Offset 768
+               OpMemberDecorate %mat4x2_f16_4 0 Offset 0
+               OpMemberDecorate %mat4x2_f16_4 1 Offset 4
+               OpMemberDecorate %mat4x2_f16_4 2 Offset 8
+               OpMemberDecorate %mat4x2_f16_4 3 Offset 12
+               OpDecorate %_arr_mat4x2_f16_4_uint_2 ArrayStride 16
+               OpDecorate %_arr_Inner_std140_uint_8 ArrayStride 800
+               OpDecorate %ub NonWritable
+               OpDecorate %ub Binding 0
+               OpDecorate %ub DescriptorSet 0
+               OpDecorate %_arr_mat4v2half_uint_2 ArrayStride 16
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+      %idx_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+    %v2float = OpTypeVector %float 2
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+     %v2half = OpTypeVector %half 2
+    %v3float = OpTypeVector %float 3
+      %v3int = OpTypeVector %int 3
+     %v3uint = OpTypeVector %uint 3
+     %v3half = OpTypeVector %half 3
+    %v4float = OpTypeVector %float 4
+      %v4int = OpTypeVector %int 4
+     %v4uint = OpTypeVector %uint 4
+     %v4half = OpTypeVector %half 4
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+%mat4x2_f16_4 = OpTypeStruct %v2half %v2half %v2half %v2half
+%_arr_mat4x2_f16_4_uint_2 = OpTypeArray %mat4x2_f16_4 %uint_2
+%Inner_std140 = OpTypeStruct %float %int %uint %half %v2float %v2int %v2uint %v2half %v3float %v3int %v3uint %v3half %v4float %v4int %v4uint %v4half %v2float %v2float %mat2v3float %mat2v4float %v2float %v2float %v2float %mat3v3float %mat3v4float %v2float %v2float %v2float %v2float %mat4v3float %mat4v4float %v2half %v2half %v3half %v3half %v4half %v4half %v2half %v2half %v2half %v3half %v3half %v3half %v4half %v4half %v4half %v2half %v2half %v2half %v2half %v3half %v3half %v3half %v3half %v4half %v4half %v4half %v4half %_arr_v3float_uint_2 %_arr_mat4x2_f16_4_uint_2
+     %uint_8 = OpConstant %uint 8
+%_arr_Inner_std140_uint_8 = OpTypeArray %Inner_std140 %uint_8
+   %S_std140 = OpTypeStruct %_arr_Inner_std140_uint_8
+%ub_block_std140 = OpTypeStruct %S_std140
+%_ptr_Uniform_ub_block_std140 = OpTypePointer Uniform %ub_block_std140
+         %ub = OpVariable %_ptr_Uniform_ub_block_std140 Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+         %36 = OpTypeFunction %mat2v2float %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+    %uint_16 = OpConstant %uint 16
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+    %uint_17 = OpConstant %uint 17
+%mat3v2float = OpTypeMatrix %v2float 3
+         %55 = OpTypeFunction %mat3v2float %uint
+    %uint_20 = OpConstant %uint 20
+    %uint_21 = OpConstant %uint 21
+    %uint_22 = OpConstant %uint 22
+%mat4v2float = OpTypeMatrix %v2float 4
+         %75 = OpTypeFunction %mat4v2float %uint
+    %uint_25 = OpConstant %uint 25
+    %uint_26 = OpConstant %uint 26
+    %uint_27 = OpConstant %uint 27
+    %uint_28 = OpConstant %uint 28
+ %mat2v2half = OpTypeMatrix %v2half 2
+         %99 = OpTypeFunction %mat2v2half %uint
+    %uint_31 = OpConstant %uint 31
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+    %uint_32 = OpConstant %uint 32
+ %mat2v3half = OpTypeMatrix %v3half 2
+        %116 = OpTypeFunction %mat2v3half %uint
+    %uint_33 = OpConstant %uint 33
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+    %uint_34 = OpConstant %uint 34
+ %mat2v4half = OpTypeMatrix %v4half 2
+        %133 = OpTypeFunction %mat2v4half %uint
+    %uint_35 = OpConstant %uint 35
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+    %uint_36 = OpConstant %uint 36
+ %mat3v2half = OpTypeMatrix %v2half 3
+        %150 = OpTypeFunction %mat3v2half %uint
+    %uint_37 = OpConstant %uint 37
+    %uint_38 = OpConstant %uint 38
+    %uint_39 = OpConstant %uint 39
+ %mat3v3half = OpTypeMatrix %v3half 3
+        %170 = OpTypeFunction %mat3v3half %uint
+    %uint_40 = OpConstant %uint 40
+    %uint_41 = OpConstant %uint 41
+    %uint_42 = OpConstant %uint 42
+ %mat3v4half = OpTypeMatrix %v4half 3
+        %190 = OpTypeFunction %mat3v4half %uint
+    %uint_43 = OpConstant %uint 43
+    %uint_44 = OpConstant %uint 44
+    %uint_45 = OpConstant %uint 45
+ %mat4v2half = OpTypeMatrix %v2half 4
+        %210 = OpTypeFunction %mat4v2half %uint
+    %uint_46 = OpConstant %uint 46
+    %uint_47 = OpConstant %uint 47
+    %uint_48 = OpConstant %uint 48
+    %uint_49 = OpConstant %uint 49
+ %mat4v3half = OpTypeMatrix %v3half 4
+        %234 = OpTypeFunction %mat4v3half %uint
+    %uint_50 = OpConstant %uint 50
+    %uint_51 = OpConstant %uint 51
+    %uint_52 = OpConstant %uint 52
+    %uint_53 = OpConstant %uint 53
+ %mat4v4half = OpTypeMatrix %v4half 4
+        %258 = OpTypeFunction %mat4v4half %uint
+    %uint_54 = OpConstant %uint 54
+    %uint_55 = OpConstant %uint 55
+    %uint_56 = OpConstant %uint 56
+    %uint_57 = OpConstant %uint 57
+        %282 = OpTypeFunction %mat4v2half %mat4x2_f16_4
+%_arr_mat4v2half_uint_2 = OpTypeArray %mat4v2half %uint_2
+        %291 = OpTypeFunction %_arr_mat4v2half_uint_2 %_arr_mat4x2_f16_4_uint_2
+%_ptr_Function__arr_mat4v2half_uint_2 = OpTypePointer Function %_arr_mat4v2half_uint_2
+        %298 = OpConstantNull %_arr_mat4v2half_uint_2
+%_ptr_Function_uint = OpTypePointer Function %uint
+        %301 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x2_f16_4_uint_2 = OpTypePointer Function %_arr_mat4x2_f16_4_uint_2
+        %314 = OpConstantNull %_arr_mat4x2_f16_4_uint_2
+%_ptr_Function_mat4v2half = OpTypePointer Function %mat4v2half
+%_ptr_Function_mat4x2_f16_4 = OpTypePointer Function %mat4x2_f16_4
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+        %327 = OpTypeFunction %void %uint
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+     %uint_3 = OpConstant %uint 3
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+     %uint_4 = OpConstant %uint 4
+     %uint_5 = OpConstant %uint 5
+%_ptr_Uniform_v2int = OpTypePointer Uniform %v2int
+     %uint_6 = OpConstant %uint 6
+%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
+     %uint_7 = OpConstant %uint 7
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %uint_9 = OpConstant %uint 9
+%_ptr_Uniform_v3int = OpTypePointer Uniform %v3int
+    %uint_10 = OpConstant %uint 10
+%_ptr_Uniform_v3uint = OpTypePointer Uniform %v3uint
+    %uint_11 = OpConstant %uint 11
+    %uint_12 = OpConstant %uint 12
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+    %uint_13 = OpConstant %uint 13
+%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
+    %uint_14 = OpConstant %uint 14
+%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint
+    %uint_15 = OpConstant %uint 15
+    %uint_18 = OpConstant %uint 18
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+    %uint_19 = OpConstant %uint 19
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+    %uint_23 = OpConstant %uint 23
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+    %uint_24 = OpConstant %uint 24
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+    %uint_29 = OpConstant %uint 29
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+    %uint_30 = OpConstant %uint 30
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+    %uint_58 = OpConstant %uint 58
+%_ptr_Uniform__arr_v3float_uint_2 = OpTypePointer Uniform %_arr_v3float_uint_2
+    %uint_59 = OpConstant %uint 59
+%_ptr_Uniform__arr_mat4x2_f16_4_uint_2 = OpTypePointer Uniform %_arr_mat4x2_f16_4_uint_2
+        %445 = OpTypeFunction %void
+%load_ub_inner_arr_p0_mat2x2_f32 = OpFunction %mat2v2float None %36
+         %p0 = OpFunctionParameter %uint
+         %40 = OpLabel
+         %44 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0
+         %48 = OpAccessChain %_ptr_Uniform_v2float %44 %uint_16
+         %49 = OpLoad %v2float %48
+         %52 = OpAccessChain %_ptr_Uniform_v2float %44 %uint_17
+         %53 = OpLoad %v2float %52
+         %54 = OpCompositeConstruct %mat2v2float %49 %53
+               OpReturnValue %54
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat3x2_f32 = OpFunction %mat3v2float None %55
+       %p0_0 = OpFunctionParameter %uint
+         %59 = OpLabel
+         %61 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_0
+         %64 = OpAccessChain %_ptr_Uniform_v2float %61 %uint_20
+         %65 = OpLoad %v2float %64
+         %68 = OpAccessChain %_ptr_Uniform_v2float %61 %uint_21
+         %69 = OpLoad %v2float %68
+         %72 = OpAccessChain %_ptr_Uniform_v2float %61 %uint_22
+         %73 = OpLoad %v2float %72
+         %74 = OpCompositeConstruct %mat3v2float %65 %69 %73
+               OpReturnValue %74
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat4x2_f32 = OpFunction %mat4v2float None %75
+       %p0_1 = OpFunctionParameter %uint
+         %79 = OpLabel
+         %81 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_1
+         %84 = OpAccessChain %_ptr_Uniform_v2float %81 %uint_25
+         %85 = OpLoad %v2float %84
+         %88 = OpAccessChain %_ptr_Uniform_v2float %81 %uint_26
+         %89 = OpLoad %v2float %88
+         %92 = OpAccessChain %_ptr_Uniform_v2float %81 %uint_27
+         %93 = OpLoad %v2float %92
+         %96 = OpAccessChain %_ptr_Uniform_v2float %81 %uint_28
+         %97 = OpLoad %v2float %96
+         %98 = OpCompositeConstruct %mat4v2float %85 %89 %93 %97
+               OpReturnValue %98
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat2x2_f16 = OpFunction %mat2v2half None %99
+       %p0_2 = OpFunctionParameter %uint
+        %103 = OpLabel
+        %105 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_2
+        %109 = OpAccessChain %_ptr_Uniform_v2half %105 %uint_31
+        %110 = OpLoad %v2half %109
+        %113 = OpAccessChain %_ptr_Uniform_v2half %105 %uint_32
+        %114 = OpLoad %v2half %113
+        %115 = OpCompositeConstruct %mat2v2half %110 %114
+               OpReturnValue %115
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat2x3_f16 = OpFunction %mat2v3half None %116
+       %p0_3 = OpFunctionParameter %uint
+        %120 = OpLabel
+        %122 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_3
+        %126 = OpAccessChain %_ptr_Uniform_v3half %122 %uint_33
+        %127 = OpLoad %v3half %126
+        %130 = OpAccessChain %_ptr_Uniform_v3half %122 %uint_34
+        %131 = OpLoad %v3half %130
+        %132 = OpCompositeConstruct %mat2v3half %127 %131
+               OpReturnValue %132
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat2x4_f16 = OpFunction %mat2v4half None %133
+       %p0_4 = OpFunctionParameter %uint
+        %137 = OpLabel
+        %139 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_4
+        %143 = OpAccessChain %_ptr_Uniform_v4half %139 %uint_35
+        %144 = OpLoad %v4half %143
+        %147 = OpAccessChain %_ptr_Uniform_v4half %139 %uint_36
+        %148 = OpLoad %v4half %147
+        %149 = OpCompositeConstruct %mat2v4half %144 %148
+               OpReturnValue %149
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat3x2_f16 = OpFunction %mat3v2half None %150
+       %p0_5 = OpFunctionParameter %uint
+        %154 = OpLabel
+        %156 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_5
+        %159 = OpAccessChain %_ptr_Uniform_v2half %156 %uint_37
+        %160 = OpLoad %v2half %159
+        %163 = OpAccessChain %_ptr_Uniform_v2half %156 %uint_38
+        %164 = OpLoad %v2half %163
+        %167 = OpAccessChain %_ptr_Uniform_v2half %156 %uint_39
+        %168 = OpLoad %v2half %167
+        %169 = OpCompositeConstruct %mat3v2half %160 %164 %168
+               OpReturnValue %169
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat3x3_f16 = OpFunction %mat3v3half None %170
+       %p0_6 = OpFunctionParameter %uint
+        %174 = OpLabel
+        %176 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_6
+        %179 = OpAccessChain %_ptr_Uniform_v3half %176 %uint_40
+        %180 = OpLoad %v3half %179
+        %183 = OpAccessChain %_ptr_Uniform_v3half %176 %uint_41
+        %184 = OpLoad %v3half %183
+        %187 = OpAccessChain %_ptr_Uniform_v3half %176 %uint_42
+        %188 = OpLoad %v3half %187
+        %189 = OpCompositeConstruct %mat3v3half %180 %184 %188
+               OpReturnValue %189
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat3x4_f16 = OpFunction %mat3v4half None %190
+       %p0_7 = OpFunctionParameter %uint
+        %194 = OpLabel
+        %196 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_7
+        %199 = OpAccessChain %_ptr_Uniform_v4half %196 %uint_43
+        %200 = OpLoad %v4half %199
+        %203 = OpAccessChain %_ptr_Uniform_v4half %196 %uint_44
+        %204 = OpLoad %v4half %203
+        %207 = OpAccessChain %_ptr_Uniform_v4half %196 %uint_45
+        %208 = OpLoad %v4half %207
+        %209 = OpCompositeConstruct %mat3v4half %200 %204 %208
+               OpReturnValue %209
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat4x2_f16 = OpFunction %mat4v2half None %210
+       %p0_8 = OpFunctionParameter %uint
+        %214 = OpLabel
+        %216 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_8
+        %219 = OpAccessChain %_ptr_Uniform_v2half %216 %uint_46
+        %220 = OpLoad %v2half %219
+        %223 = OpAccessChain %_ptr_Uniform_v2half %216 %uint_47
+        %224 = OpLoad %v2half %223
+        %227 = OpAccessChain %_ptr_Uniform_v2half %216 %uint_48
+        %228 = OpLoad %v2half %227
+        %231 = OpAccessChain %_ptr_Uniform_v2half %216 %uint_49
+        %232 = OpLoad %v2half %231
+        %233 = OpCompositeConstruct %mat4v2half %220 %224 %228 %232
+               OpReturnValue %233
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat4x3_f16 = OpFunction %mat4v3half None %234
+       %p0_9 = OpFunctionParameter %uint
+        %238 = OpLabel
+        %240 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_9
+        %243 = OpAccessChain %_ptr_Uniform_v3half %240 %uint_50
+        %244 = OpLoad %v3half %243
+        %247 = OpAccessChain %_ptr_Uniform_v3half %240 %uint_51
+        %248 = OpLoad %v3half %247
+        %251 = OpAccessChain %_ptr_Uniform_v3half %240 %uint_52
+        %252 = OpLoad %v3half %251
+        %255 = OpAccessChain %_ptr_Uniform_v3half %240 %uint_53
+        %256 = OpLoad %v3half %255
+        %257 = OpCompositeConstruct %mat4v3half %244 %248 %252 %256
+               OpReturnValue %257
+               OpFunctionEnd
+%load_ub_inner_arr_p0_mat4x4_f16 = OpFunction %mat4v4half None %258
+      %p0_10 = OpFunctionParameter %uint
+        %262 = OpLabel
+        %264 = OpAccessChain %_ptr_Uniform_Inner_std140 %ub %uint_0 %uint_0 %p0_10
+        %267 = OpAccessChain %_ptr_Uniform_v4half %264 %uint_54
+        %268 = OpLoad %v4half %267
+        %271 = OpAccessChain %_ptr_Uniform_v4half %264 %uint_55
+        %272 = OpLoad %v4half %271
+        %275 = OpAccessChain %_ptr_Uniform_v4half %264 %uint_56
+        %276 = OpLoad %v4half %275
+        %279 = OpAccessChain %_ptr_Uniform_v4half %264 %uint_57
+        %280 = OpLoad %v4half %279
+        %281 = OpCompositeConstruct %mat4v4half %268 %272 %276 %280
+               OpReturnValue %281
+               OpFunctionEnd
+%conv_mat4x2_f16 = OpFunction %mat4v2half None %282
+        %val = OpFunctionParameter %mat4x2_f16_4
+        %285 = OpLabel
+        %286 = OpCompositeExtract %v2half %val 0
+        %287 = OpCompositeExtract %v2half %val 1
+        %288 = OpCompositeExtract %v2half %val 2
+        %289 = OpCompositeExtract %v2half %val 3
+        %290 = OpCompositeConstruct %mat4v2half %286 %287 %288 %289
+               OpReturnValue %290
+               OpFunctionEnd
+%conv_arr2_mat4x2_f16 = OpFunction %_arr_mat4v2half_uint_2 None %291
+      %val_0 = OpFunctionParameter %_arr_mat4x2_f16_4_uint_2
+        %295 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v2half_uint_2 Function %298
+          %i = OpVariable %_ptr_Function_uint Function %301
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x2_f16_4_uint_2 Function %314
+               OpBranch %302
+        %302 = OpLabel
+               OpLoopMerge %303 %304 None
+               OpBranch %305
+        %305 = OpLabel
+        %307 = OpLoad %uint %i
+        %308 = OpULessThan %bool %307 %uint_2
+        %306 = OpLogicalNot %bool %308
+               OpSelectionMerge %310 None
+               OpBranchConditional %306 %311 %310
+        %311 = OpLabel
+               OpBranch %303
+        %310 = OpLabel
+               OpStore %var_for_index %val_0
+        %315 = OpLoad %uint %i
+        %317 = OpAccessChain %_ptr_Function_mat4v2half %arr %315
+        %319 = OpLoad %uint %i
+        %321 = OpAccessChain %_ptr_Function_mat4x2_f16_4 %var_for_index %319
+        %322 = OpLoad %mat4x2_f16_4 %321
+        %318 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %322
+               OpStore %317 %318
+               OpBranch %304
+        %304 = OpLabel
+        %323 = OpLoad %uint %i
+        %325 = OpIAdd %uint %323 %uint_1
+               OpStore %i %325
+               OpBranch %302
+        %303 = OpLabel
+        %326 = OpLoad %_arr_mat4v2half_uint_2 %arr
+               OpReturnValue %326
+               OpFunctionEnd
+ %main_inner = OpFunction %void None %327
+        %idx = OpFunctionParameter %uint
+        %331 = OpLabel
+        %333 = OpAccessChain %_ptr_Uniform_float %ub %uint_0 %uint_0 %idx %uint_0
+        %334 = OpLoad %float %333
+        %336 = OpAccessChain %_ptr_Uniform_int %ub %uint_0 %uint_0 %idx %uint_1
+        %337 = OpLoad %int %336
+        %339 = OpAccessChain %_ptr_Uniform_uint %ub %uint_0 %uint_0 %idx %uint_2
+        %340 = OpLoad %uint %339
+        %343 = OpAccessChain %_ptr_Uniform_half %ub %uint_0 %uint_0 %idx %uint_3
+        %344 = OpLoad %half %343
+        %346 = OpAccessChain %_ptr_Uniform_v2float %ub %uint_0 %uint_0 %idx %uint_4
+        %347 = OpLoad %v2float %346
+        %350 = OpAccessChain %_ptr_Uniform_v2int %ub %uint_0 %uint_0 %idx %uint_5
+        %351 = OpLoad %v2int %350
+        %354 = OpAccessChain %_ptr_Uniform_v2uint %ub %uint_0 %uint_0 %idx %uint_6
+        %355 = OpLoad %v2uint %354
+        %357 = OpAccessChain %_ptr_Uniform_v2half %ub %uint_0 %uint_0 %idx %uint_7
+        %358 = OpLoad %v2half %357
+        %360 = OpAccessChain %_ptr_Uniform_v3float %ub %uint_0 %uint_0 %idx %uint_8
+        %361 = OpLoad %v3float %360
+        %364 = OpAccessChain %_ptr_Uniform_v3int %ub %uint_0 %uint_0 %idx %uint_9
+        %365 = OpLoad %v3int %364
+        %368 = OpAccessChain %_ptr_Uniform_v3uint %ub %uint_0 %uint_0 %idx %uint_10
+        %369 = OpLoad %v3uint %368
+        %371 = OpAccessChain %_ptr_Uniform_v3half %ub %uint_0 %uint_0 %idx %uint_11
+        %372 = OpLoad %v3half %371
+        %375 = OpAccessChain %_ptr_Uniform_v4float %ub %uint_0 %uint_0 %idx %uint_12
+        %376 = OpLoad %v4float %375
+        %379 = OpAccessChain %_ptr_Uniform_v4int %ub %uint_0 %uint_0 %idx %uint_13
+        %380 = OpLoad %v4int %379
+        %383 = OpAccessChain %_ptr_Uniform_v4uint %ub %uint_0 %uint_0 %idx %uint_14
+        %384 = OpLoad %v4uint %383
+        %386 = OpAccessChain %_ptr_Uniform_v4half %ub %uint_0 %uint_0 %idx %uint_15
+        %387 = OpLoad %v4half %386
+        %388 = OpFunctionCall %mat2v2float %load_ub_inner_arr_p0_mat2x2_f32 %idx
+        %392 = OpAccessChain %_ptr_Uniform_mat2v3float %ub %uint_0 %uint_0 %idx %uint_18
+        %393 = OpLoad %mat2v3float %392
+        %396 = OpAccessChain %_ptr_Uniform_mat2v4float %ub %uint_0 %uint_0 %idx %uint_19
+        %397 = OpLoad %mat2v4float %396
+        %398 = OpFunctionCall %mat3v2float %load_ub_inner_arr_p0_mat3x2_f32 %idx
+        %402 = OpAccessChain %_ptr_Uniform_mat3v3float %ub %uint_0 %uint_0 %idx %uint_23
+        %403 = OpLoad %mat3v3float %402
+        %406 = OpAccessChain %_ptr_Uniform_mat3v4float %ub %uint_0 %uint_0 %idx %uint_24
+        %407 = OpLoad %mat3v4float %406
+        %408 = OpFunctionCall %mat4v2float %load_ub_inner_arr_p0_mat4x2_f32 %idx
+        %412 = OpAccessChain %_ptr_Uniform_mat4v3float %ub %uint_0 %uint_0 %idx %uint_29
+        %413 = OpLoad %mat4v3float %412
+        %416 = OpAccessChain %_ptr_Uniform_mat4v4float %ub %uint_0 %uint_0 %idx %uint_30
+        %417 = OpLoad %mat4v4float %416
+        %418 = OpFunctionCall %mat2v2half %load_ub_inner_arr_p0_mat2x2_f16 %idx
+        %420 = OpFunctionCall %mat2v3half %load_ub_inner_arr_p0_mat2x3_f16 %idx
+        %422 = OpFunctionCall %mat2v4half %load_ub_inner_arr_p0_mat2x4_f16 %idx
+        %424 = OpFunctionCall %mat3v2half %load_ub_inner_arr_p0_mat3x2_f16 %idx
+        %426 = OpFunctionCall %mat3v3half %load_ub_inner_arr_p0_mat3x3_f16 %idx
+        %428 = OpFunctionCall %mat3v4half %load_ub_inner_arr_p0_mat3x4_f16 %idx
+        %430 = OpFunctionCall %mat4v2half %load_ub_inner_arr_p0_mat4x2_f16 %idx
+        %432 = OpFunctionCall %mat4v3half %load_ub_inner_arr_p0_mat4x3_f16 %idx
+        %434 = OpFunctionCall %mat4v4half %load_ub_inner_arr_p0_mat4x4_f16 %idx
+        %438 = OpAccessChain %_ptr_Uniform__arr_v3float_uint_2 %ub %uint_0 %uint_0 %idx %uint_58
+        %439 = OpLoad %_arr_v3float_uint_2 %438
+        %443 = OpAccessChain %_ptr_Uniform__arr_mat4x2_f16_4_uint_2 %ub %uint_0 %uint_0 %idx %uint_59
+        %444 = OpLoad %_arr_mat4x2_f16_4_uint_2 %443
+        %440 = OpFunctionCall %_arr_mat4v2half_uint_2 %conv_arr2_mat4x2_f16 %444
+               OpReturn
+               OpFunctionEnd
+       %main = OpFunction %void None %445
+        %447 = OpLabel
+        %449 = OpLoad %uint %idx_1
+        %448 = OpFunctionCall %void %main_inner %449
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..3b14216
--- /dev/null
+++ b/test/tint/buffer/uniform/dynamic_index/read_f16.wgsl.expected.wgsl

@@ -0,0 +1,87 @@
+enable f16;
+
+struct Inner {
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  @align(16)
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+}
+
+struct S {
+  arr : array<Inner, 8>,
+}
+
+@binding(0) @group(0) var<uniform> ub : S;
+
+@compute @workgroup_size(1)
+fn main(@builtin(local_invocation_index) idx : u32) {
+  let scalar_f32 : f32 = ub.arr[idx].scalar_f32;
+  let scalar_i32 : i32 = ub.arr[idx].scalar_i32;
+  let scalar_u32 : u32 = ub.arr[idx].scalar_u32;
+  let scalar_f16 : f16 = ub.arr[idx].scalar_f16;
+  let vec2_f32 : vec2<f32> = ub.arr[idx].vec2_f32;
+  let vec2_i32 : vec2<i32> = ub.arr[idx].vec2_i32;
+  let vec2_u32 : vec2<u32> = ub.arr[idx].vec2_u32;
+  let vec2_f16 : vec2<f16> = ub.arr[idx].vec2_f16;
+  let vec3_f32 : vec3<f32> = ub.arr[idx].vec3_f32;
+  let vec3_i32 : vec3<i32> = ub.arr[idx].vec3_i32;
+  let vec3_u32 : vec3<u32> = ub.arr[idx].vec3_u32;
+  let vec3_f16 : vec3<f16> = ub.arr[idx].vec3_f16;
+  let vec4_f32 : vec4<f32> = ub.arr[idx].vec4_f32;
+  let vec4_i32 : vec4<i32> = ub.arr[idx].vec4_i32;
+  let vec4_u32 : vec4<u32> = ub.arr[idx].vec4_u32;
+  let vec4_f16 : vec4<f16> = ub.arr[idx].vec4_f16;
+  let mat2x2_f32 : mat2x2<f32> = ub.arr[idx].mat2x2_f32;
+  let mat2x3_f32 : mat2x3<f32> = ub.arr[idx].mat2x3_f32;
+  let mat2x4_f32 : mat2x4<f32> = ub.arr[idx].mat2x4_f32;
+  let mat3x2_f32 : mat3x2<f32> = ub.arr[idx].mat3x2_f32;
+  let mat3x3_f32 : mat3x3<f32> = ub.arr[idx].mat3x3_f32;
+  let mat3x4_f32 : mat3x4<f32> = ub.arr[idx].mat3x4_f32;
+  let mat4x2_f32 : mat4x2<f32> = ub.arr[idx].mat4x2_f32;
+  let mat4x3_f32 : mat4x3<f32> = ub.arr[idx].mat4x3_f32;
+  let mat4x4_f32 : mat4x4<f32> = ub.arr[idx].mat4x4_f32;
+  let mat2x2_f16 : mat2x2<f16> = ub.arr[idx].mat2x2_f16;
+  let mat2x3_f16 : mat2x3<f16> = ub.arr[idx].mat2x3_f16;
+  let mat2x4_f16 : mat2x4<f16> = ub.arr[idx].mat2x4_f16;
+  let mat3x2_f16 : mat3x2<f16> = ub.arr[idx].mat3x2_f16;
+  let mat3x3_f16 : mat3x3<f16> = ub.arr[idx].mat3x3_f16;
+  let mat3x4_f16 : mat3x4<f16> = ub.arr[idx].mat3x4_f16;
+  let mat4x2_f16 : mat4x2<f16> = ub.arr[idx].mat4x2_f16;
+  let mat4x3_f16 : mat4x3<f16> = ub.arr[idx].mat4x3_f16;
+  let mat4x4_f16 : mat4x4<f16> = ub.arr[idx].mat4x4_f16;
+  let arr2_vec3_f32 : array<vec3<f32>, 2> = ub.arr[idx].arr2_vec3_f32;
+  let arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = ub.arr[idx].arr2_mat4x2_f16;
+}

diff --git a/test/tint/buffer/uniform/static_index/read.wgsl b/test/tint/buffer/uniform/static_index/read.wgsl
index ed6e7dc..8d064d9 100644
--- a/test/tint/buffer/uniform/static_index/read.wgsl
+++ b/test/tint/buffer/uniform/static_index/read.wgsl

@@ -1,36 +1,62 @@
 struct Inner {
-    @size(16) x : i32,
+    scalar_i32 : i32,
+    @align(16) @size(16)
+    scalar_f32 : f32,
 };
 
 struct S {
-    a : vec3<i32>,
-    b : i32,
-    c : vec3<u32>,
-    d : u32,
-    e : vec3<f32>,
-    f : f32,
-    g : vec2<i32>,
-    h : vec2<i32>,
-    i : mat2x3<f32>,
-    j : mat3x2<f32>,
-    @align(16) k : Inner,
-    @align(16) l : array<Inner, 4>,
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    @align(16) arr2_vec3_f32 : array<vec3<f32>, 2>,
+    @align(16) struct_inner : Inner,
+    @align(16) array_struct_inner : array<Inner, 4>,
 };
 
-@binding(0) @group(0) var<uniform> s : S;
+@binding(0) @group(0) var<uniform> ub : S;
 
 @compute @workgroup_size(1)
 fn main() {
-    let a = s.a;
-    let b = s.b;
-    let c = s.c;
-    let d = s.d;
-    let e = s.e;
-    let f = s.f;
-    let g = s.g;
-    let h = s.h;
-    let i = s.i;
-    let j = s.j;
-    let k = s.k;
-    let l = s.l;
+    let scalar_f32 = ub.scalar_f32;
+    let scalar_i32 = ub.scalar_i32;
+    let scalar_u32 = ub.scalar_u32;
+    let vec2_f32 = ub.vec2_f32;
+    let vec2_i32 = ub.vec2_i32;
+    let vec2_u32 = ub.vec2_u32;
+    let vec3_f32 = ub.vec3_f32;
+    let vec3_i32 = ub.vec3_i32;
+    let vec3_u32 = ub.vec3_u32;
+    let vec4_f32 = ub.vec4_f32;
+    let vec4_i32 = ub.vec4_i32;
+    let vec4_u32 = ub.vec4_u32;
+    let mat2x2_f32 = ub.mat2x2_f32;
+    let mat2x3_f32 = ub.mat2x3_f32;
+    let mat2x4_f32 = ub.mat2x4_f32;
+    let mat3x2_f32 = ub.mat3x2_f32;
+    let mat3x3_f32 = ub.mat3x3_f32;
+    let mat3x4_f32 = ub.mat3x4_f32;
+    let mat4x2_f32 = ub.mat4x2_f32;
+    let mat4x3_f32 = ub.mat4x3_f32;
+    let mat4x4_f32 = ub.mat4x4_f32;
+    let arr2_vec3_f32 = ub.arr2_vec3_f32;
+    let struct_inner = ub.struct_inner;
+    let array_struct_inner = ub.array_struct_inner;
 }

diff --git a/test/tint/buffer/uniform/static_index/read.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/static_index/read.wgsl.expected.dxc.hlsl
index 8538133..7a69526 100644
--- a/test/tint/buffer/uniform/static_index/read.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/static_index/read.wgsl.expected.dxc.hlsl

@@ -1,57 +1,139 @@
 struct Inner {
-  int x;
+  int scalar_i32;
+  float scalar_f32;
 };
 
-cbuffer cbuffer_s : register(b0, space0) {
-  uint4 s[13];
+cbuffer cbuffer_ub : register(b0, space0) {
+  uint4 ub[44];
 };
 
-float2x3 tint_symbol_7(uint4 buffer[13], uint offset) {
+float2x2 tint_symbol_12(uint4 buffer[44], uint offset) {
   const uint scalar_offset = ((offset + 0u)) / 4;
-  const uint scalar_offset_1 = ((offset + 16u)) / 4;
-  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
 }
 
-float3x2 tint_symbol_8(uint4 buffer[13], uint offset) {
+float2x3 tint_symbol_13(uint4 buffer[44], uint offset) {
   const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_3 / 4];
-  const uint scalar_offset_4 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_4 / 4];
-  return float3x2(asfloat(((scalar_offset_2 & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_4 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+  const uint scalar_offset_3 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
 }
 
-Inner tint_symbol_10(uint4 buffer[13], uint offset) {
-  const uint scalar_offset_5 = ((offset + 0u)) / 4;
-  const Inner tint_symbol_12 = {asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
-  return tint_symbol_12;
+float2x4 tint_symbol_14(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset_4 / 4]), asfloat(buffer[scalar_offset_5 / 4]));
 }
 
-typedef Inner tint_symbol_11_ret[4];
-tint_symbol_11_ret tint_symbol_11(uint4 buffer[13], uint offset) {
-  Inner arr[4] = (Inner[4])0;
+float3x2 tint_symbol_15(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_6 = ((offset + 0u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_6 / 4];
+  const uint scalar_offset_7 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_7 / 4];
+  const uint scalar_offset_8 = ((offset + 16u)) / 4;
+  uint4 ubo_load_4 = buffer[scalar_offset_8 / 4];
+  return float3x2(asfloat(((scalar_offset_6 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_7 & 2) ? ubo_load_3.zw : ubo_load_3.xy)), asfloat(((scalar_offset_8 & 2) ? ubo_load_4.zw : ubo_load_4.xy)));
+}
+
+float3x3 tint_symbol_16(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_9 = ((offset + 0u)) / 4;
+  const uint scalar_offset_10 = ((offset + 16u)) / 4;
+  const uint scalar_offset_11 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset_9 / 4].xyz), asfloat(buffer[scalar_offset_10 / 4].xyz), asfloat(buffer[scalar_offset_11 / 4].xyz));
+}
+
+float3x4 tint_symbol_17(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_12 = ((offset + 0u)) / 4;
+  const uint scalar_offset_13 = ((offset + 16u)) / 4;
+  const uint scalar_offset_14 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset_12 / 4]), asfloat(buffer[scalar_offset_13 / 4]), asfloat(buffer[scalar_offset_14 / 4]));
+}
+
+float4x2 tint_symbol_18(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_15 = ((offset + 0u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_15 / 4];
+  const uint scalar_offset_16 = ((offset + 8u)) / 4;
+  uint4 ubo_load_6 = buffer[scalar_offset_16 / 4];
+  const uint scalar_offset_17 = ((offset + 16u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_17 / 4];
+  const uint scalar_offset_18 = ((offset + 24u)) / 4;
+  uint4 ubo_load_8 = buffer[scalar_offset_18 / 4];
+  return float4x2(asfloat(((scalar_offset_15 & 2) ? ubo_load_5.zw : ubo_load_5.xy)), asfloat(((scalar_offset_16 & 2) ? ubo_load_6.zw : ubo_load_6.xy)), asfloat(((scalar_offset_17 & 2) ? ubo_load_7.zw : ubo_load_7.xy)), asfloat(((scalar_offset_18 & 2) ? ubo_load_8.zw : ubo_load_8.xy)));
+}
+
+float4x3 tint_symbol_19(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_19 = ((offset + 0u)) / 4;
+  const uint scalar_offset_20 = ((offset + 16u)) / 4;
+  const uint scalar_offset_21 = ((offset + 32u)) / 4;
+  const uint scalar_offset_22 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset_19 / 4].xyz), asfloat(buffer[scalar_offset_20 / 4].xyz), asfloat(buffer[scalar_offset_21 / 4].xyz), asfloat(buffer[scalar_offset_22 / 4].xyz));
+}
+
+float4x4 tint_symbol_20(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_23 = ((offset + 0u)) / 4;
+  const uint scalar_offset_24 = ((offset + 16u)) / 4;
+  const uint scalar_offset_25 = ((offset + 32u)) / 4;
+  const uint scalar_offset_26 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset_23 / 4]), asfloat(buffer[scalar_offset_24 / 4]), asfloat(buffer[scalar_offset_25 / 4]), asfloat(buffer[scalar_offset_26 / 4]));
+}
+
+typedef float3 tint_symbol_21_ret[2];
+tint_symbol_21_ret tint_symbol_21(uint4 buffer[44], uint offset) {
+  float3 arr[2] = (float3[2])0;
   {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_10(buffer, (offset + (i_1 * 16u)));
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      const uint scalar_offset_27 = ((offset + (i * 16u))) / 4;
+      arr[i] = asfloat(buffer[scalar_offset_27 / 4].xyz);
     }
   }
   return arr;
 }
 
+Inner tint_symbol_22(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_28 = ((offset + 0u)) / 4;
+  const uint scalar_offset_29 = ((offset + 16u)) / 4;
+  const Inner tint_symbol_24 = {asint(buffer[scalar_offset_28 / 4][scalar_offset_28 % 4]), asfloat(buffer[scalar_offset_29 / 4][scalar_offset_29 % 4])};
+  return tint_symbol_24;
+}
+
+typedef Inner tint_symbol_23_ret[4];
+tint_symbol_23_ret tint_symbol_23(uint4 buffer[44], uint offset) {
+  Inner arr_1[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_22(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr_1;
+}
+
 [numthreads(1, 1, 1)]
 void main() {
-  const int3 a = asint(s[0].xyz);
-  const int b = asint(s[0].w);
-  const uint3 c = s[1].xyz;
-  const uint d = s[1].w;
-  const float3 e = asfloat(s[2].xyz);
-  const float f = asfloat(s[2].w);
-  const int2 g = asint(s[3].xy);
-  const int2 h = asint(s[3].zw);
-  const float2x3 i = tint_symbol_7(s, 64u);
-  const float3x2 j = tint_symbol_8(s, 96u);
-  const Inner k = tint_symbol_10(s, 128u);
-  const Inner l[4] = tint_symbol_11(s, 144u);
+  const float scalar_f32 = asfloat(ub[0].x);
+  const int scalar_i32 = asint(ub[0].y);
+  const uint scalar_u32 = ub[0].z;
+  const float2 vec2_f32 = asfloat(ub[1].xy);
+  const int2 vec2_i32 = asint(ub[1].zw);
+  const uint2 vec2_u32 = ub[2].xy;
+  const float3 vec3_f32 = asfloat(ub[3].xyz);
+  const int3 vec3_i32 = asint(ub[4].xyz);
+  const uint3 vec3_u32 = ub[5].xyz;
+  const float4 vec4_f32 = asfloat(ub[6]);
+  const int4 vec4_i32 = asint(ub[7]);
+  const uint4 vec4_u32 = ub[8];
+  const float2x2 mat2x2_f32 = tint_symbol_12(ub, 144u);
+  const float2x3 mat2x3_f32 = tint_symbol_13(ub, 160u);
+  const float2x4 mat2x4_f32 = tint_symbol_14(ub, 192u);
+  const float3x2 mat3x2_f32 = tint_symbol_15(ub, 224u);
+  const float3x3 mat3x3_f32 = tint_symbol_16(ub, 256u);
+  const float3x4 mat3x4_f32 = tint_symbol_17(ub, 304u);
+  const float4x2 mat4x2_f32 = tint_symbol_18(ub, 352u);
+  const float4x3 mat4x3_f32 = tint_symbol_19(ub, 384u);
+  const float4x4 mat4x4_f32 = tint_symbol_20(ub, 448u);
+  const float3 arr2_vec3_f32[2] = tint_symbol_21(ub, 512u);
+  const Inner struct_inner = tint_symbol_22(ub, 544u);
+  const Inner array_struct_inner[4] = tint_symbol_23(ub, 576u);
   return;
 }

diff --git a/test/tint/buffer/uniform/static_index/read.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/static_index/read.wgsl.expected.fxc.hlsl
index 8538133..7a69526 100644
--- a/test/tint/buffer/uniform/static_index/read.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/static_index/read.wgsl.expected.fxc.hlsl

@@ -1,57 +1,139 @@
 struct Inner {
-  int x;
+  int scalar_i32;
+  float scalar_f32;
 };
 
-cbuffer cbuffer_s : register(b0, space0) {
-  uint4 s[13];
+cbuffer cbuffer_ub : register(b0, space0) {
+  uint4 ub[44];
 };
 
-float2x3 tint_symbol_7(uint4 buffer[13], uint offset) {
+float2x2 tint_symbol_12(uint4 buffer[44], uint offset) {
   const uint scalar_offset = ((offset + 0u)) / 4;
-  const uint scalar_offset_1 = ((offset + 16u)) / 4;
-  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
 }
 
-float3x2 tint_symbol_8(uint4 buffer[13], uint offset) {
+float2x3 tint_symbol_13(uint4 buffer[44], uint offset) {
   const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_3 / 4];
-  const uint scalar_offset_4 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_4 / 4];
-  return float3x2(asfloat(((scalar_offset_2 & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_4 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+  const uint scalar_offset_3 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
 }
 
-Inner tint_symbol_10(uint4 buffer[13], uint offset) {
-  const uint scalar_offset_5 = ((offset + 0u)) / 4;
-  const Inner tint_symbol_12 = {asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
-  return tint_symbol_12;
+float2x4 tint_symbol_14(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset_4 / 4]), asfloat(buffer[scalar_offset_5 / 4]));
 }
 
-typedef Inner tint_symbol_11_ret[4];
-tint_symbol_11_ret tint_symbol_11(uint4 buffer[13], uint offset) {
-  Inner arr[4] = (Inner[4])0;
+float3x2 tint_symbol_15(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_6 = ((offset + 0u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_6 / 4];
+  const uint scalar_offset_7 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_7 / 4];
+  const uint scalar_offset_8 = ((offset + 16u)) / 4;
+  uint4 ubo_load_4 = buffer[scalar_offset_8 / 4];
+  return float3x2(asfloat(((scalar_offset_6 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_7 & 2) ? ubo_load_3.zw : ubo_load_3.xy)), asfloat(((scalar_offset_8 & 2) ? ubo_load_4.zw : ubo_load_4.xy)));
+}
+
+float3x3 tint_symbol_16(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_9 = ((offset + 0u)) / 4;
+  const uint scalar_offset_10 = ((offset + 16u)) / 4;
+  const uint scalar_offset_11 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset_9 / 4].xyz), asfloat(buffer[scalar_offset_10 / 4].xyz), asfloat(buffer[scalar_offset_11 / 4].xyz));
+}
+
+float3x4 tint_symbol_17(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_12 = ((offset + 0u)) / 4;
+  const uint scalar_offset_13 = ((offset + 16u)) / 4;
+  const uint scalar_offset_14 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset_12 / 4]), asfloat(buffer[scalar_offset_13 / 4]), asfloat(buffer[scalar_offset_14 / 4]));
+}
+
+float4x2 tint_symbol_18(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_15 = ((offset + 0u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_15 / 4];
+  const uint scalar_offset_16 = ((offset + 8u)) / 4;
+  uint4 ubo_load_6 = buffer[scalar_offset_16 / 4];
+  const uint scalar_offset_17 = ((offset + 16u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_17 / 4];
+  const uint scalar_offset_18 = ((offset + 24u)) / 4;
+  uint4 ubo_load_8 = buffer[scalar_offset_18 / 4];
+  return float4x2(asfloat(((scalar_offset_15 & 2) ? ubo_load_5.zw : ubo_load_5.xy)), asfloat(((scalar_offset_16 & 2) ? ubo_load_6.zw : ubo_load_6.xy)), asfloat(((scalar_offset_17 & 2) ? ubo_load_7.zw : ubo_load_7.xy)), asfloat(((scalar_offset_18 & 2) ? ubo_load_8.zw : ubo_load_8.xy)));
+}
+
+float4x3 tint_symbol_19(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_19 = ((offset + 0u)) / 4;
+  const uint scalar_offset_20 = ((offset + 16u)) / 4;
+  const uint scalar_offset_21 = ((offset + 32u)) / 4;
+  const uint scalar_offset_22 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset_19 / 4].xyz), asfloat(buffer[scalar_offset_20 / 4].xyz), asfloat(buffer[scalar_offset_21 / 4].xyz), asfloat(buffer[scalar_offset_22 / 4].xyz));
+}
+
+float4x4 tint_symbol_20(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_23 = ((offset + 0u)) / 4;
+  const uint scalar_offset_24 = ((offset + 16u)) / 4;
+  const uint scalar_offset_25 = ((offset + 32u)) / 4;
+  const uint scalar_offset_26 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset_23 / 4]), asfloat(buffer[scalar_offset_24 / 4]), asfloat(buffer[scalar_offset_25 / 4]), asfloat(buffer[scalar_offset_26 / 4]));
+}
+
+typedef float3 tint_symbol_21_ret[2];
+tint_symbol_21_ret tint_symbol_21(uint4 buffer[44], uint offset) {
+  float3 arr[2] = (float3[2])0;
   {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_10(buffer, (offset + (i_1 * 16u)));
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      const uint scalar_offset_27 = ((offset + (i * 16u))) / 4;
+      arr[i] = asfloat(buffer[scalar_offset_27 / 4].xyz);
     }
   }
   return arr;
 }
 
+Inner tint_symbol_22(uint4 buffer[44], uint offset) {
+  const uint scalar_offset_28 = ((offset + 0u)) / 4;
+  const uint scalar_offset_29 = ((offset + 16u)) / 4;
+  const Inner tint_symbol_24 = {asint(buffer[scalar_offset_28 / 4][scalar_offset_28 % 4]), asfloat(buffer[scalar_offset_29 / 4][scalar_offset_29 % 4])};
+  return tint_symbol_24;
+}
+
+typedef Inner tint_symbol_23_ret[4];
+tint_symbol_23_ret tint_symbol_23(uint4 buffer[44], uint offset) {
+  Inner arr_1[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_22(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr_1;
+}
+
 [numthreads(1, 1, 1)]
 void main() {
-  const int3 a = asint(s[0].xyz);
-  const int b = asint(s[0].w);
-  const uint3 c = s[1].xyz;
-  const uint d = s[1].w;
-  const float3 e = asfloat(s[2].xyz);
-  const float f = asfloat(s[2].w);
-  const int2 g = asint(s[3].xy);
-  const int2 h = asint(s[3].zw);
-  const float2x3 i = tint_symbol_7(s, 64u);
-  const float3x2 j = tint_symbol_8(s, 96u);
-  const Inner k = tint_symbol_10(s, 128u);
-  const Inner l[4] = tint_symbol_11(s, 144u);
+  const float scalar_f32 = asfloat(ub[0].x);
+  const int scalar_i32 = asint(ub[0].y);
+  const uint scalar_u32 = ub[0].z;
+  const float2 vec2_f32 = asfloat(ub[1].xy);
+  const int2 vec2_i32 = asint(ub[1].zw);
+  const uint2 vec2_u32 = ub[2].xy;
+  const float3 vec3_f32 = asfloat(ub[3].xyz);
+  const int3 vec3_i32 = asint(ub[4].xyz);
+  const uint3 vec3_u32 = ub[5].xyz;
+  const float4 vec4_f32 = asfloat(ub[6]);
+  const int4 vec4_i32 = asint(ub[7]);
+  const uint4 vec4_u32 = ub[8];
+  const float2x2 mat2x2_f32 = tint_symbol_12(ub, 144u);
+  const float2x3 mat2x3_f32 = tint_symbol_13(ub, 160u);
+  const float2x4 mat2x4_f32 = tint_symbol_14(ub, 192u);
+  const float3x2 mat3x2_f32 = tint_symbol_15(ub, 224u);
+  const float3x3 mat3x3_f32 = tint_symbol_16(ub, 256u);
+  const float3x4 mat3x4_f32 = tint_symbol_17(ub, 304u);
+  const float4x2 mat4x2_f32 = tint_symbol_18(ub, 352u);
+  const float4x3 mat4x3_f32 = tint_symbol_19(ub, 384u);
+  const float4x4 mat4x4_f32 = tint_symbol_20(ub, 448u);
+  const float3 arr2_vec3_f32[2] = tint_symbol_21(ub, 512u);
+  const Inner struct_inner = tint_symbol_22(ub, 544u);
+  const Inner array_struct_inner[4] = tint_symbol_23(ub, 576u);
   return;
 }

diff --git a/test/tint/buffer/uniform/static_index/read.wgsl.expected.glsl b/test/tint/buffer/uniform/static_index/read.wgsl.expected.glsl
index 3315e42..89ce45b 100644
--- a/test/tint/buffer/uniform/static_index/read.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/static_index/read.wgsl.expected.glsl

@@ -1,69 +1,133 @@
 #version 310 es
 
 struct Inner {
-  int x;
+  int scalar_i32;
   uint pad;
   uint pad_1;
   uint pad_2;
+  float scalar_f32;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
 };
 
 struct S {
-  ivec3 a;
-  int b;
-  uvec3 c;
-  uint d;
-  vec3 e;
-  float f;
-  ivec2 g;
-  ivec2 h;
-  mat2x3 i;
-  mat3x2 j;
-  uint pad_3;
-  uint pad_4;
-  Inner k;
-  Inner l[4];
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
+  uint pad_6;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  uint pad_7;
+  uint pad_8;
+  vec3 vec3_f32;
+  uint pad_9;
+  ivec3 vec3_i32;
+  uint pad_10;
+  uvec3 vec3_u32;
+  uint pad_11;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  mat2 mat2x2_f32;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_12;
+  uint pad_13;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  vec3 arr2_vec3_f32[2];
+  Inner struct_inner;
+  Inner array_struct_inner[4];
 };
 
 struct S_std140 {
-  ivec3 a;
-  int b;
-  uvec3 c;
-  uint d;
-  vec3 e;
-  float f;
-  ivec2 g;
-  ivec2 h;
-  mat2x3 i;
-  vec2 j_0;
-  vec2 j_1;
-  vec2 j_2;
-  uint pad_3;
-  uint pad_4;
-  Inner k;
-  Inner l[4];
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
+  uint pad_6;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  uint pad_7;
+  uint pad_8;
+  vec3 vec3_f32;
+  uint pad_9;
+  ivec3 vec3_i32;
+  uint pad_10;
+  uvec3 vec3_u32;
+  uint pad_11;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  vec2 mat2x2_f32_0;
+  vec2 mat2x2_f32_1;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  vec2 mat3x2_f32_0;
+  vec2 mat3x2_f32_1;
+  vec2 mat3x2_f32_2;
+  uint pad_12;
+  uint pad_13;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  vec2 mat4x2_f32_0;
+  vec2 mat4x2_f32_1;
+  vec2 mat4x2_f32_2;
+  vec2 mat4x2_f32_3;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  vec3 arr2_vec3_f32[2];
+  Inner struct_inner;
+  Inner array_struct_inner[4];
 };
 
-layout(binding = 0, std140) uniform s_block_std140_ubo {
+layout(binding = 0, std140) uniform ub_block_std140_ubo {
   S_std140 inner;
-} s;
+} ub;
 
-mat3x2 load_s_inner_j() {
-  return mat3x2(s.inner.j_0, s.inner.j_1, s.inner.j_2);
+mat2 load_ub_inner_mat2x2_f32() {
+  return mat2(ub.inner.mat2x2_f32_0, ub.inner.mat2x2_f32_1);
+}
+
+mat3x2 load_ub_inner_mat3x2_f32() {
+  return mat3x2(ub.inner.mat3x2_f32_0, ub.inner.mat3x2_f32_1, ub.inner.mat3x2_f32_2);
+}
+
+mat4x2 load_ub_inner_mat4x2_f32() {
+  return mat4x2(ub.inner.mat4x2_f32_0, ub.inner.mat4x2_f32_1, ub.inner.mat4x2_f32_2, ub.inner.mat4x2_f32_3);
 }
 
 void tint_symbol() {
-  ivec3 a = s.inner.a;
-  int b = s.inner.b;
-  uvec3 c = s.inner.c;
-  uint d = s.inner.d;
-  vec3 e = s.inner.e;
-  float f = s.inner.f;
-  ivec2 g = s.inner.g;
-  ivec2 h = s.inner.h;
-  mat2x3 i = s.inner.i;
-  mat3x2 j = load_s_inner_j();
-  Inner k = s.inner.k;
-  Inner l[4] = s.inner.l;
+  float scalar_f32 = ub.inner.scalar_f32;
+  int scalar_i32 = ub.inner.scalar_i32;
+  uint scalar_u32 = ub.inner.scalar_u32;
+  vec2 vec2_f32 = ub.inner.vec2_f32;
+  ivec2 vec2_i32 = ub.inner.vec2_i32;
+  uvec2 vec2_u32 = ub.inner.vec2_u32;
+  vec3 vec3_f32 = ub.inner.vec3_f32;
+  ivec3 vec3_i32 = ub.inner.vec3_i32;
+  uvec3 vec3_u32 = ub.inner.vec3_u32;
+  vec4 vec4_f32 = ub.inner.vec4_f32;
+  ivec4 vec4_i32 = ub.inner.vec4_i32;
+  uvec4 vec4_u32 = ub.inner.vec4_u32;
+  mat2 mat2x2_f32 = load_ub_inner_mat2x2_f32();
+  mat2x3 mat2x3_f32 = ub.inner.mat2x3_f32;
+  mat2x4 mat2x4_f32 = ub.inner.mat2x4_f32;
+  mat3x2 mat3x2_f32 = load_ub_inner_mat3x2_f32();
+  mat3 mat3x3_f32 = ub.inner.mat3x3_f32;
+  mat3x4 mat3x4_f32 = ub.inner.mat3x4_f32;
+  mat4x2 mat4x2_f32 = load_ub_inner_mat4x2_f32();
+  mat4x3 mat4x3_f32 = ub.inner.mat4x3_f32;
+  mat4 mat4x4_f32 = ub.inner.mat4x4_f32;
+  vec3 arr2_vec3_f32[2] = ub.inner.arr2_vec3_f32;
+  Inner struct_inner = ub.inner.struct_inner;
+  Inner array_struct_inner[4] = ub.inner.array_struct_inner;
 }
 
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

diff --git a/test/tint/buffer/uniform/static_index/read.wgsl.expected.msl b/test/tint/buffer/uniform/static_index/read.wgsl.expected.msl
index 02d091d..93dc443 100644
--- a/test/tint/buffer/uniform/static_index/read.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/static_index/read.wgsl.expected.msl

@@ -15,39 +15,70 @@
 };
 
 struct Inner {
-  /* 0x0000 */ int x;
+  /* 0x0000 */ int scalar_i32;
   /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float scalar_f32;
+  /* 0x0014 */ tint_array<int8_t, 12> tint_pad_1;
 };
 
 struct S {
-  /* 0x0000 */ packed_int3 a;
-  /* 0x000c */ int b;
-  /* 0x0010 */ packed_uint3 c;
-  /* 0x001c */ uint d;
-  /* 0x0020 */ packed_float3 e;
-  /* 0x002c */ float f;
-  /* 0x0030 */ int2 g;
-  /* 0x0038 */ int2 h;
-  /* 0x0040 */ float2x3 i;
-  /* 0x0060 */ float3x2 j;
-  /* 0x0078 */ tint_array<int8_t, 8> tint_pad_1;
-  /* 0x0080 */ Inner k;
-  /* 0x0090 */ tint_array<Inner, 4> l;
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ tint_array<int8_t, 8> tint_pad_3;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_5;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_6;
+  /* 0x0060 */ float4 vec4_f32;
+  /* 0x0070 */ int4 vec4_i32;
+  /* 0x0080 */ uint4 vec4_u32;
+  /* 0x0090 */ float2x2 mat2x2_f32;
+  /* 0x00a0 */ float2x3 mat2x3_f32;
+  /* 0x00c0 */ float2x4 mat2x4_f32;
+  /* 0x00e0 */ float3x2 mat3x2_f32;
+  /* 0x00f8 */ tint_array<int8_t, 8> tint_pad_7;
+  /* 0x0100 */ float3x3 mat3x3_f32;
+  /* 0x0130 */ float3x4 mat3x4_f32;
+  /* 0x0160 */ float4x2 mat4x2_f32;
+  /* 0x0180 */ float4x3 mat4x3_f32;
+  /* 0x01c0 */ float4x4 mat4x4_f32;
+  /* 0x0200 */ tint_array<float3, 2> arr2_vec3_f32;
+  /* 0x0220 */ Inner struct_inner;
+  /* 0x0240 */ tint_array<Inner, 4> array_struct_inner;
 };
 
 kernel void tint_symbol(const constant S* tint_symbol_1 [[buffer(0)]]) {
-  int3 const a = int3((*(tint_symbol_1)).a);
-  int const b = (*(tint_symbol_1)).b;
-  uint3 const c = uint3((*(tint_symbol_1)).c);
-  uint const d = (*(tint_symbol_1)).d;
-  float3 const e = float3((*(tint_symbol_1)).e);
-  float const f = (*(tint_symbol_1)).f;
-  int2 const g = (*(tint_symbol_1)).g;
-  int2 const h = (*(tint_symbol_1)).h;
-  float2x3 const i = (*(tint_symbol_1)).i;
-  float3x2 const j = (*(tint_symbol_1)).j;
-  Inner const k = (*(tint_symbol_1)).k;
-  tint_array<Inner, 4> const l = (*(tint_symbol_1)).l;
+  float const scalar_f32 = (*(tint_symbol_1)).scalar_f32;
+  int const scalar_i32 = (*(tint_symbol_1)).scalar_i32;
+  uint const scalar_u32 = (*(tint_symbol_1)).scalar_u32;
+  float2 const vec2_f32 = (*(tint_symbol_1)).vec2_f32;
+  int2 const vec2_i32 = (*(tint_symbol_1)).vec2_i32;
+  uint2 const vec2_u32 = (*(tint_symbol_1)).vec2_u32;
+  float3 const vec3_f32 = float3((*(tint_symbol_1)).vec3_f32);
+  int3 const vec3_i32 = int3((*(tint_symbol_1)).vec3_i32);
+  uint3 const vec3_u32 = uint3((*(tint_symbol_1)).vec3_u32);
+  float4 const vec4_f32 = (*(tint_symbol_1)).vec4_f32;
+  int4 const vec4_i32 = (*(tint_symbol_1)).vec4_i32;
+  uint4 const vec4_u32 = (*(tint_symbol_1)).vec4_u32;
+  float2x2 const mat2x2_f32 = (*(tint_symbol_1)).mat2x2_f32;
+  float2x3 const mat2x3_f32 = (*(tint_symbol_1)).mat2x3_f32;
+  float2x4 const mat2x4_f32 = (*(tint_symbol_1)).mat2x4_f32;
+  float3x2 const mat3x2_f32 = (*(tint_symbol_1)).mat3x2_f32;
+  float3x3 const mat3x3_f32 = (*(tint_symbol_1)).mat3x3_f32;
+  float3x4 const mat3x4_f32 = (*(tint_symbol_1)).mat3x4_f32;
+  float4x2 const mat4x2_f32 = (*(tint_symbol_1)).mat4x2_f32;
+  float4x3 const mat4x3_f32 = (*(tint_symbol_1)).mat4x3_f32;
+  float4x4 const mat4x4_f32 = (*(tint_symbol_1)).mat4x4_f32;
+  tint_array<float3, 2> const arr2_vec3_f32 = (*(tint_symbol_1)).arr2_vec3_f32;
+  Inner const struct_inner = (*(tint_symbol_1)).struct_inner;
+  tint_array<Inner, 4> const array_struct_inner = (*(tint_symbol_1)).array_struct_inner;
   return;
 }
 

diff --git a/test/tint/buffer/uniform/static_index/read.wgsl.expected.spvasm b/test/tint/buffer/uniform/static_index/read.wgsl.expected.spvasm
index 11b5abb..95b5802 100644
--- a/test/tint/buffer/uniform/static_index/read.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/static_index/read.wgsl.expected.spvasm

@@ -1,138 +1,271 @@
 ; SPIR-V
 ; Version: 1.3
 ; Generator: Google Tint Compiler; 0
-; Bound: 85
+; Bound: 175
 ; Schema: 0
                OpCapability Shader
                OpMemoryModel Logical GLSL450
                OpEntryPoint GLCompute %main "main"
                OpExecutionMode %main LocalSize 1 1 1
-               OpName %s_block_std140 "s_block_std140"
-               OpMemberName %s_block_std140 0 "inner"
+               OpName %ub_block_std140 "ub_block_std140"
+               OpMemberName %ub_block_std140 0 "inner"
                OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "a"
-               OpMemberName %S_std140 1 "b"
-               OpMemberName %S_std140 2 "c"
-               OpMemberName %S_std140 3 "d"
-               OpMemberName %S_std140 4 "e"
-               OpMemberName %S_std140 5 "f"
-               OpMemberName %S_std140 6 "g"
-               OpMemberName %S_std140 7 "h"
-               OpMemberName %S_std140 8 "i"
-               OpMemberName %S_std140 9 "j_0"
-               OpMemberName %S_std140 10 "j_1"
-               OpMemberName %S_std140 11 "j_2"
-               OpMemberName %S_std140 12 "k"
+               OpMemberName %S_std140 0 "scalar_f32"
+               OpMemberName %S_std140 1 "scalar_i32"
+               OpMemberName %S_std140 2 "scalar_u32"
+               OpMemberName %S_std140 3 "vec2_f32"
+               OpMemberName %S_std140 4 "vec2_i32"
+               OpMemberName %S_std140 5 "vec2_u32"
+               OpMemberName %S_std140 6 "vec3_f32"
+               OpMemberName %S_std140 7 "vec3_i32"
+               OpMemberName %S_std140 8 "vec3_u32"
+               OpMemberName %S_std140 9 "vec4_f32"
+               OpMemberName %S_std140 10 "vec4_i32"
+               OpMemberName %S_std140 11 "vec4_u32"
+               OpMemberName %S_std140 12 "mat2x2_f32_0"
+               OpMemberName %S_std140 13 "mat2x2_f32_1"
+               OpMemberName %S_std140 14 "mat2x3_f32"
+               OpMemberName %S_std140 15 "mat2x4_f32"
+               OpMemberName %S_std140 16 "mat3x2_f32_0"
+               OpMemberName %S_std140 17 "mat3x2_f32_1"
+               OpMemberName %S_std140 18 "mat3x2_f32_2"
+               OpMemberName %S_std140 19 "mat3x3_f32"
+               OpMemberName %S_std140 20 "mat3x4_f32"
+               OpMemberName %S_std140 21 "mat4x2_f32_0"
+               OpMemberName %S_std140 22 "mat4x2_f32_1"
+               OpMemberName %S_std140 23 "mat4x2_f32_2"
+               OpMemberName %S_std140 24 "mat4x2_f32_3"
+               OpMemberName %S_std140 25 "mat4x3_f32"
+               OpMemberName %S_std140 26 "mat4x4_f32"
+               OpMemberName %S_std140 27 "arr2_vec3_f32"
+               OpMemberName %S_std140 28 "struct_inner"
                OpName %Inner "Inner"
-               OpMemberName %Inner 0 "x"
-               OpMemberName %S_std140 13 "l"
-               OpName %s "s"
-               OpName %load_s_inner_j "load_s_inner_j"
+               OpMemberName %Inner 0 "scalar_i32"
+               OpMemberName %Inner 1 "scalar_f32"
+               OpMemberName %S_std140 29 "array_struct_inner"
+               OpName %ub "ub"
+               OpName %load_ub_inner_mat2x2_f32 "load_ub_inner_mat2x2_f32"
+               OpName %load_ub_inner_mat3x2_f32 "load_ub_inner_mat3x2_f32"
+               OpName %load_ub_inner_mat4x2_f32 "load_ub_inner_mat4x2_f32"
                OpName %main "main"
-               OpDecorate %s_block_std140 Block
-               OpMemberDecorate %s_block_std140 0 Offset 0
+               OpDecorate %ub_block_std140 Block
+               OpMemberDecorate %ub_block_std140 0 Offset 0
                OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 12
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 28
-               OpMemberDecorate %S_std140 4 Offset 32
-               OpMemberDecorate %S_std140 5 Offset 44
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 16
+               OpMemberDecorate %S_std140 4 Offset 24
+               OpMemberDecorate %S_std140 5 Offset 32
                OpMemberDecorate %S_std140 6 Offset 48
-               OpMemberDecorate %S_std140 7 Offset 56
-               OpMemberDecorate %S_std140 8 Offset 64
-               OpMemberDecorate %S_std140 8 ColMajor
-               OpMemberDecorate %S_std140 8 MatrixStride 16
+               OpMemberDecorate %S_std140 7 Offset 64
+               OpMemberDecorate %S_std140 8 Offset 80
                OpMemberDecorate %S_std140 9 Offset 96
-               OpMemberDecorate %S_std140 10 Offset 104
-               OpMemberDecorate %S_std140 11 Offset 112
-               OpMemberDecorate %S_std140 12 Offset 128
+               OpMemberDecorate %S_std140 10 Offset 112
+               OpMemberDecorate %S_std140 11 Offset 128
+               OpMemberDecorate %S_std140 12 Offset 144
+               OpMemberDecorate %S_std140 13 Offset 152
+               OpMemberDecorate %S_std140 14 Offset 160
+               OpMemberDecorate %S_std140 14 ColMajor
+               OpMemberDecorate %S_std140 14 MatrixStride 16
+               OpMemberDecorate %S_std140 15 Offset 192
+               OpMemberDecorate %S_std140 15 ColMajor
+               OpMemberDecorate %S_std140 15 MatrixStride 16
+               OpMemberDecorate %S_std140 16 Offset 224
+               OpMemberDecorate %S_std140 17 Offset 232
+               OpMemberDecorate %S_std140 18 Offset 240
+               OpMemberDecorate %S_std140 19 Offset 256
+               OpMemberDecorate %S_std140 19 ColMajor
+               OpMemberDecorate %S_std140 19 MatrixStride 16
+               OpMemberDecorate %S_std140 20 Offset 304
+               OpMemberDecorate %S_std140 20 ColMajor
+               OpMemberDecorate %S_std140 20 MatrixStride 16
+               OpMemberDecorate %S_std140 21 Offset 352
+               OpMemberDecorate %S_std140 22 Offset 360
+               OpMemberDecorate %S_std140 23 Offset 368
+               OpMemberDecorate %S_std140 24 Offset 376
+               OpMemberDecorate %S_std140 25 Offset 384
+               OpMemberDecorate %S_std140 25 ColMajor
+               OpMemberDecorate %S_std140 25 MatrixStride 16
+               OpMemberDecorate %S_std140 26 Offset 448
+               OpMemberDecorate %S_std140 26 ColMajor
+               OpMemberDecorate %S_std140 26 MatrixStride 16
+               OpMemberDecorate %S_std140 27 Offset 512
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpMemberDecorate %S_std140 28 Offset 544
                OpMemberDecorate %Inner 0 Offset 0
-               OpMemberDecorate %S_std140 13 Offset 144
-               OpDecorate %_arr_Inner_uint_4 ArrayStride 16
-               OpDecorate %s NonWritable
-               OpDecorate %s Binding 0
-               OpDecorate %s DescriptorSet 0
-        %int = OpTypeInt 32 1
-      %v3int = OpTypeVector %int 3
-       %uint = OpTypeInt 32 0
-     %v3uint = OpTypeVector %uint 3
+               OpMemberDecorate %Inner 1 Offset 16
+               OpMemberDecorate %S_std140 29 Offset 576
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 32
+               OpDecorate %ub NonWritable
+               OpDecorate %ub Binding 0
+               OpDecorate %ub DescriptorSet 0
       %float = OpTypeFloat 32
-    %v3float = OpTypeVector %float 3
-      %v2int = OpTypeVector %int 2
-%mat2v3float = OpTypeMatrix %v3float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
     %v2float = OpTypeVector %float 2
-      %Inner = OpTypeStruct %int
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+    %v3float = OpTypeVector %float 3
+      %v3int = OpTypeVector %int 3
+     %v3uint = OpTypeVector %uint 3
+    %v4float = OpTypeVector %float 4
+      %v4int = OpTypeVector %int 4
+     %v4uint = OpTypeVector %uint 4
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+      %Inner = OpTypeStruct %int %float
      %uint_4 = OpConstant %uint 4
 %_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
-   %S_std140 = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %v2int %v2int %mat2v3float %v2float %v2float %v2float %Inner %_arr_Inner_uint_4
-%s_block_std140 = OpTypeStruct %S_std140
-%_ptr_Uniform_s_block_std140 = OpTypePointer Uniform %s_block_std140
-          %s = OpVariable %_ptr_Uniform_s_block_std140 Uniform
-%mat3v2float = OpTypeMatrix %v2float 3
-         %17 = OpTypeFunction %mat3v2float
+   %S_std140 = OpTypeStruct %float %int %uint %v2float %v2int %v2uint %v3float %v3int %v3uint %v4float %v4int %v4uint %v2float %v2float %mat2v3float %mat2v4float %v2float %v2float %v2float %mat3v3float %mat3v4float %v2float %v2float %v2float %v2float %mat4v3float %mat4v4float %_arr_v3float_uint_2 %Inner %_arr_Inner_uint_4
+%ub_block_std140 = OpTypeStruct %S_std140
+%_ptr_Uniform_ub_block_std140 = OpTypePointer Uniform %ub_block_std140
+         %ub = OpVariable %_ptr_Uniform_ub_block_std140 Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+         %28 = OpTypeFunction %mat2v2float
      %uint_0 = OpConstant %uint 0
 %_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-     %uint_9 = OpConstant %uint 9
+    %uint_12 = OpConstant %uint 12
 %_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-    %uint_10 = OpConstant %uint 10
-    %uint_11 = OpConstant %uint 11
+    %uint_13 = OpConstant %uint 13
+%mat3v2float = OpTypeMatrix %v2float 3
+         %46 = OpTypeFunction %mat3v2float
+    %uint_16 = OpConstant %uint 16
+    %uint_17 = OpConstant %uint 17
+    %uint_18 = OpConstant %uint 18
+%mat4v2float = OpTypeMatrix %v2float 4
+         %65 = OpTypeFunction %mat4v2float
+    %uint_21 = OpConstant %uint 21
+    %uint_22 = OpConstant %uint 22
+    %uint_23 = OpConstant %uint 23
+    %uint_24 = OpConstant %uint 24
        %void = OpTypeVoid
-         %39 = OpTypeFunction %void
-%_ptr_Uniform_v3int = OpTypePointer Uniform %v3int
+         %88 = OpTypeFunction %void
+%_ptr_Uniform_float = OpTypePointer Uniform %float
      %uint_1 = OpConstant %uint 1
 %_ptr_Uniform_int = OpTypePointer Uniform %int
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_v3uint = OpTypePointer Uniform %v3uint
-     %uint_3 = OpConstant %uint 3
 %_ptr_Uniform_uint = OpTypePointer Uniform %uint
-%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
-     %uint_5 = OpConstant %uint 5
-%_ptr_Uniform_float = OpTypePointer Uniform %float
-     %uint_6 = OpConstant %uint 6
+     %uint_3 = OpConstant %uint 3
 %_ptr_Uniform_v2int = OpTypePointer Uniform %v2int
+     %uint_5 = OpConstant %uint 5
+%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
+     %uint_6 = OpConstant %uint 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
      %uint_7 = OpConstant %uint 7
+%_ptr_Uniform_v3int = OpTypePointer Uniform %v3int
      %uint_8 = OpConstant %uint 8
+%_ptr_Uniform_v3uint = OpTypePointer Uniform %v3uint
+     %uint_9 = OpConstant %uint 9
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+    %uint_10 = OpConstant %uint 10
+%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
+    %uint_11 = OpConstant %uint 11
+%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint
+    %uint_14 = OpConstant %uint 14
 %_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
-    %uint_12 = OpConstant %uint 12
+    %uint_15 = OpConstant %uint 15
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+    %uint_19 = OpConstant %uint 19
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+    %uint_20 = OpConstant %uint 20
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+    %uint_25 = OpConstant %uint 25
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+    %uint_26 = OpConstant %uint 26
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+    %uint_27 = OpConstant %uint 27
+%_ptr_Uniform__arr_v3float_uint_2 = OpTypePointer Uniform %_arr_v3float_uint_2
+    %uint_28 = OpConstant %uint 28
 %_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
-    %uint_13 = OpConstant %uint 13
+    %uint_29 = OpConstant %uint 29
 %_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
-%load_s_inner_j = OpFunction %mat3v2float None %17
-         %20 = OpLabel
-         %24 = OpAccessChain %_ptr_Uniform_S_std140 %s %uint_0
-         %28 = OpAccessChain %_ptr_Uniform_v2float %24 %uint_9
-         %29 = OpLoad %v2float %28
-         %32 = OpAccessChain %_ptr_Uniform_v2float %24 %uint_10
-         %33 = OpLoad %v2float %32
-         %36 = OpAccessChain %_ptr_Uniform_v2float %24 %uint_11
-         %37 = OpLoad %v2float %36
-         %38 = OpCompositeConstruct %mat3v2float %29 %33 %37
-               OpReturnValue %38
+%load_ub_inner_mat2x2_f32 = OpFunction %mat2v2float None %28
+         %31 = OpLabel
+         %35 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+         %39 = OpAccessChain %_ptr_Uniform_v2float %35 %uint_12
+         %40 = OpLoad %v2float %39
+         %43 = OpAccessChain %_ptr_Uniform_v2float %35 %uint_13
+         %44 = OpLoad %v2float %43
+         %45 = OpCompositeConstruct %mat2v2float %40 %44
+               OpReturnValue %45
                OpFunctionEnd
-       %main = OpFunction %void None %39
-         %42 = OpLabel
-         %44 = OpAccessChain %_ptr_Uniform_v3int %s %uint_0 %uint_0
-         %45 = OpLoad %v3int %44
-         %48 = OpAccessChain %_ptr_Uniform_int %s %uint_0 %uint_1
-         %49 = OpLoad %int %48
-         %52 = OpAccessChain %_ptr_Uniform_v3uint %s %uint_0 %uint_2
-         %53 = OpLoad %v3uint %52
-         %56 = OpAccessChain %_ptr_Uniform_uint %s %uint_0 %uint_3
-         %57 = OpLoad %uint %56
-         %59 = OpAccessChain %_ptr_Uniform_v3float %s %uint_0 %uint_4
-         %60 = OpLoad %v3float %59
-         %63 = OpAccessChain %_ptr_Uniform_float %s %uint_0 %uint_5
-         %64 = OpLoad %float %63
-         %67 = OpAccessChain %_ptr_Uniform_v2int %s %uint_0 %uint_6
-         %68 = OpLoad %v2int %67
-         %70 = OpAccessChain %_ptr_Uniform_v2int %s %uint_0 %uint_7
-         %71 = OpLoad %v2int %70
-         %74 = OpAccessChain %_ptr_Uniform_mat2v3float %s %uint_0 %uint_8
-         %75 = OpLoad %mat2v3float %74
-         %76 = OpFunctionCall %mat3v2float %load_s_inner_j
-         %79 = OpAccessChain %_ptr_Uniform_Inner %s %uint_0 %uint_12
-         %80 = OpLoad %Inner %79
-         %83 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %s %uint_0 %uint_13
-         %84 = OpLoad %_arr_Inner_uint_4 %83
+%load_ub_inner_mat3x2_f32 = OpFunction %mat3v2float None %46
+         %49 = OpLabel
+         %51 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+         %54 = OpAccessChain %_ptr_Uniform_v2float %51 %uint_16
+         %55 = OpLoad %v2float %54
+         %58 = OpAccessChain %_ptr_Uniform_v2float %51 %uint_17
+         %59 = OpLoad %v2float %58
+         %62 = OpAccessChain %_ptr_Uniform_v2float %51 %uint_18
+         %63 = OpLoad %v2float %62
+         %64 = OpCompositeConstruct %mat3v2float %55 %59 %63
+               OpReturnValue %64
+               OpFunctionEnd
+%load_ub_inner_mat4x2_f32 = OpFunction %mat4v2float None %65
+         %68 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+         %73 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_21
+         %74 = OpLoad %v2float %73
+         %77 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_22
+         %78 = OpLoad %v2float %77
+         %81 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_23
+         %82 = OpLoad %v2float %81
+         %85 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_24
+         %86 = OpLoad %v2float %85
+         %87 = OpCompositeConstruct %mat4v2float %74 %78 %82 %86
+               OpReturnValue %87
+               OpFunctionEnd
+       %main = OpFunction %void None %88
+         %91 = OpLabel
+         %93 = OpAccessChain %_ptr_Uniform_float %ub %uint_0 %uint_0
+         %94 = OpLoad %float %93
+         %97 = OpAccessChain %_ptr_Uniform_int %ub %uint_0 %uint_1
+         %98 = OpLoad %int %97
+        %100 = OpAccessChain %_ptr_Uniform_uint %ub %uint_0 %uint_2
+        %101 = OpLoad %uint %100
+        %103 = OpAccessChain %_ptr_Uniform_v2float %ub %uint_0 %uint_3
+        %104 = OpLoad %v2float %103
+        %106 = OpAccessChain %_ptr_Uniform_v2int %ub %uint_0 %uint_4
+        %107 = OpLoad %v2int %106
+        %110 = OpAccessChain %_ptr_Uniform_v2uint %ub %uint_0 %uint_5
+        %111 = OpLoad %v2uint %110
+        %114 = OpAccessChain %_ptr_Uniform_v3float %ub %uint_0 %uint_6
+        %115 = OpLoad %v3float %114
+        %118 = OpAccessChain %_ptr_Uniform_v3int %ub %uint_0 %uint_7
+        %119 = OpLoad %v3int %118
+        %122 = OpAccessChain %_ptr_Uniform_v3uint %ub %uint_0 %uint_8
+        %123 = OpLoad %v3uint %122
+        %126 = OpAccessChain %_ptr_Uniform_v4float %ub %uint_0 %uint_9
+        %127 = OpLoad %v4float %126
+        %130 = OpAccessChain %_ptr_Uniform_v4int %ub %uint_0 %uint_10
+        %131 = OpLoad %v4int %130
+        %134 = OpAccessChain %_ptr_Uniform_v4uint %ub %uint_0 %uint_11
+        %135 = OpLoad %v4uint %134
+        %136 = OpFunctionCall %mat2v2float %load_ub_inner_mat2x2_f32
+        %139 = OpAccessChain %_ptr_Uniform_mat2v3float %ub %uint_0 %uint_14
+        %140 = OpLoad %mat2v3float %139
+        %143 = OpAccessChain %_ptr_Uniform_mat2v4float %ub %uint_0 %uint_15
+        %144 = OpLoad %mat2v4float %143
+        %145 = OpFunctionCall %mat3v2float %load_ub_inner_mat3x2_f32
+        %148 = OpAccessChain %_ptr_Uniform_mat3v3float %ub %uint_0 %uint_19
+        %149 = OpLoad %mat3v3float %148
+        %152 = OpAccessChain %_ptr_Uniform_mat3v4float %ub %uint_0 %uint_20
+        %153 = OpLoad %mat3v4float %152
+        %154 = OpFunctionCall %mat4v2float %load_ub_inner_mat4x2_f32
+        %157 = OpAccessChain %_ptr_Uniform_mat4v3float %ub %uint_0 %uint_25
+        %158 = OpLoad %mat4v3float %157
+        %161 = OpAccessChain %_ptr_Uniform_mat4v4float %ub %uint_0 %uint_26
+        %162 = OpLoad %mat4v4float %161
+        %165 = OpAccessChain %_ptr_Uniform__arr_v3float_uint_2 %ub %uint_0 %uint_27
+        %166 = OpLoad %_arr_v3float_uint_2 %165
+        %169 = OpAccessChain %_ptr_Uniform_Inner %ub %uint_0 %uint_28
+        %170 = OpLoad %Inner %169
+        %173 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %ub %uint_0 %uint_29
+        %174 = OpLoad %_arr_Inner_uint_4 %173
                OpReturn
                OpFunctionEnd

diff --git a/test/tint/buffer/uniform/static_index/read.wgsl.expected.wgsl b/test/tint/buffer/uniform/static_index/read.wgsl.expected.wgsl
index 3c5c891..46b7e07 100644
--- a/test/tint/buffer/uniform/static_index/read.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/static_index/read.wgsl.expected.wgsl

@@ -1,39 +1,65 @@
 struct Inner {
-  @size(16)
-  x : i32,
+  scalar_i32 : i32,
+  @align(16) @size(16)
+  scalar_f32 : f32,
 }
 
 struct S {
-  a : vec3<i32>,
-  b : i32,
-  c : vec3<u32>,
-  d : u32,
-  e : vec3<f32>,
-  f : f32,
-  g : vec2<i32>,
-  h : vec2<i32>,
-  i : mat2x3<f32>,
-  j : mat3x2<f32>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
   @align(16)
-  k : Inner,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
   @align(16)
-  l : array<Inner, 4>,
+  struct_inner : Inner,
+  @align(16)
+  array_struct_inner : array<Inner, 4>,
 }
 
-@binding(0) @group(0) var<uniform> s : S;
+@binding(0) @group(0) var<uniform> ub : S;
 
 @compute @workgroup_size(1)
 fn main() {
-  let a = s.a;
-  let b = s.b;
-  let c = s.c;
-  let d = s.d;
-  let e = s.e;
-  let f = s.f;
-  let g = s.g;
-  let h = s.h;
-  let i = s.i;
-  let j = s.j;
-  let k = s.k;
-  let l = s.l;
+  let scalar_f32 = ub.scalar_f32;
+  let scalar_i32 = ub.scalar_i32;
+  let scalar_u32 = ub.scalar_u32;
+  let vec2_f32 = ub.vec2_f32;
+  let vec2_i32 = ub.vec2_i32;
+  let vec2_u32 = ub.vec2_u32;
+  let vec3_f32 = ub.vec3_f32;
+  let vec3_i32 = ub.vec3_i32;
+  let vec3_u32 = ub.vec3_u32;
+  let vec4_f32 = ub.vec4_f32;
+  let vec4_i32 = ub.vec4_i32;
+  let vec4_u32 = ub.vec4_u32;
+  let mat2x2_f32 = ub.mat2x2_f32;
+  let mat2x3_f32 = ub.mat2x3_f32;
+  let mat2x4_f32 = ub.mat2x4_f32;
+  let mat3x2_f32 = ub.mat3x2_f32;
+  let mat3x3_f32 = ub.mat3x3_f32;
+  let mat3x4_f32 = ub.mat3x4_f32;
+  let mat4x2_f32 = ub.mat4x2_f32;
+  let mat4x3_f32 = ub.mat4x3_f32;
+  let mat4x4_f32 = ub.mat4x4_f32;
+  let arr2_vec3_f32 = ub.arr2_vec3_f32;
+  let struct_inner = ub.struct_inner;
+  let array_struct_inner = ub.array_struct_inner;
 }

diff --git a/test/tint/buffer/uniform/static_index/read_f16.wgsl b/test/tint/buffer/uniform/static_index/read_f16.wgsl
new file mode 100644
index 0000000..2343a12
--- /dev/null
+++ b/test/tint/buffer/uniform/static_index/read_f16.wgsl

@@ -0,0 +1,92 @@
+enable f16;
+
+struct Inner {
+    scalar_i32 : i32,
+    scalar_f32 : f32,
+    @size(8) scalar_f16 : f16,
+};
+
+struct S {
+    scalar_f32 : f32,
+    scalar_i32 : i32,
+    scalar_u32 : u32,
+    scalar_f16 : f16,
+    vec2_f32 : vec2<f32>,
+    vec2_i32 : vec2<i32>,
+    vec2_u32 : vec2<u32>,
+    vec2_f16 : vec2<f16>,
+    vec3_f32 : vec3<f32>,
+    vec3_i32 : vec3<i32>,
+    vec3_u32 : vec3<u32>,
+    vec3_f16 : vec3<f16>,
+    vec4_f32 : vec4<f32>,
+    vec4_i32 : vec4<i32>,
+    vec4_u32 : vec4<u32>,
+    vec4_f16 : vec4<f16>,
+    mat2x2_f32 : mat2x2<f32>,
+    mat2x3_f32 : mat2x3<f32>,
+    mat2x4_f32 : mat2x4<f32>,
+    mat3x2_f32 : mat3x2<f32>,
+    mat3x3_f32 : mat3x3<f32>,
+    mat3x4_f32 : mat3x4<f32>,
+    mat4x2_f32 : mat4x2<f32>,
+    mat4x3_f32 : mat4x3<f32>,
+    mat4x4_f32 : mat4x4<f32>,
+    mat2x2_f16 : mat2x2<f16>,
+    mat2x3_f16 : mat2x3<f16>,
+    mat2x4_f16 : mat2x4<f16>,
+    mat3x2_f16 : mat3x2<f16>,
+    mat3x3_f16 : mat3x3<f16>,
+    mat3x4_f16 : mat3x4<f16>,
+    mat4x2_f16 : mat4x2<f16>,
+    mat4x3_f16 : mat4x3<f16>,
+    mat4x4_f16 : mat4x4<f16>,
+    @align(16) arr2_vec3_f32 : array<vec3<f32>, 2>,
+    arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+    @align(16) struct_inner : Inner,
+    @align(16) array_struct_inner : array<Inner, 4>,
+};
+
+@binding(0) @group(0) var<uniform> ub : S;
+
+@compute @workgroup_size(1)
+fn main() {
+    let scalar_f32 = ub.scalar_f32;
+    let scalar_i32 = ub.scalar_i32;
+    let scalar_u32 = ub.scalar_u32;
+    let scalar_f16 = ub.scalar_f16;
+    let vec2_f32 = ub.vec2_f32;
+    let vec2_i32 = ub.vec2_i32;
+    let vec2_u32 = ub.vec2_u32;
+    let vec2_f16 = ub.vec2_f16;
+    let vec3_f32 = ub.vec3_f32;
+    let vec3_i32 = ub.vec3_i32;
+    let vec3_u32 = ub.vec3_u32;
+    let vec3_f16 = ub.vec3_f16;
+    let vec4_f32 = ub.vec4_f32;
+    let vec4_i32 = ub.vec4_i32;
+    let vec4_u32 = ub.vec4_u32;
+    let vec4_f16 = ub.vec4_f16;
+    let mat2x2_f32 = ub.mat2x2_f32;
+    let mat2x3_f32 = ub.mat2x3_f32;
+    let mat2x4_f32 = ub.mat2x4_f32;
+    let mat3x2_f32 = ub.mat3x2_f32;
+    let mat3x3_f32 = ub.mat3x3_f32;
+    let mat3x4_f32 = ub.mat3x4_f32;
+    let mat4x2_f32 = ub.mat4x2_f32;
+    let mat4x3_f32 = ub.mat4x3_f32;
+    let mat4x4_f32 = ub.mat4x4_f32;
+    let mat2x2_f16 = ub.mat2x2_f16;
+    let mat2x3_f16 = ub.mat2x3_f16;
+    let mat2x4_f16 = ub.mat2x4_f16;
+    let mat3x2_f16 = ub.mat3x2_f16;
+    let mat3x3_f16 = ub.mat3x3_f16;
+    let mat3x4_f16 = ub.mat3x4_f16;
+    let mat4x2_f16 = ub.mat4x2_f16;
+    let mat4x3_f16 = ub.mat4x3_f16;
+    let mat4x4_f16 = ub.mat4x4_f16;
+    let arr2_vec3_f32 = ub.arr2_vec3_f32;
+    let arr2_mat4x2_f16 = ub.arr2_mat4x2_f16;
+    let struct_inner = ub.struct_inner;
+    let array_struct_inner = ub.array_struct_inner;
+}

diff --git a/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..d0e6ba8
--- /dev/null
+++ b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,318 @@
+struct Inner {
+  int scalar_i32;
+  float scalar_f32;
+  float16_t scalar_f16;
+};
+
+cbuffer cbuffer_ub : register(b0, space0) {
+  uint4 ub[55];
+};
+
+float2x2 tint_symbol_16(uint4 buffer[55], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+float2x3 tint_symbol_17(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+float2x4 tint_symbol_18(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset_4 / 4]), asfloat(buffer[scalar_offset_5 / 4]));
+}
+
+float3x2 tint_symbol_19(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_6 = ((offset + 0u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_6 / 4];
+  const uint scalar_offset_7 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_7 / 4];
+  const uint scalar_offset_8 = ((offset + 16u)) / 4;
+  uint4 ubo_load_4 = buffer[scalar_offset_8 / 4];
+  return float3x2(asfloat(((scalar_offset_6 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_7 & 2) ? ubo_load_3.zw : ubo_load_3.xy)), asfloat(((scalar_offset_8 & 2) ? ubo_load_4.zw : ubo_load_4.xy)));
+}
+
+float3x3 tint_symbol_20(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_9 = ((offset + 0u)) / 4;
+  const uint scalar_offset_10 = ((offset + 16u)) / 4;
+  const uint scalar_offset_11 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset_9 / 4].xyz), asfloat(buffer[scalar_offset_10 / 4].xyz), asfloat(buffer[scalar_offset_11 / 4].xyz));
+}
+
+float3x4 tint_symbol_21(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_12 = ((offset + 0u)) / 4;
+  const uint scalar_offset_13 = ((offset + 16u)) / 4;
+  const uint scalar_offset_14 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset_12 / 4]), asfloat(buffer[scalar_offset_13 / 4]), asfloat(buffer[scalar_offset_14 / 4]));
+}
+
+float4x2 tint_symbol_22(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_15 = ((offset + 0u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_15 / 4];
+  const uint scalar_offset_16 = ((offset + 8u)) / 4;
+  uint4 ubo_load_6 = buffer[scalar_offset_16 / 4];
+  const uint scalar_offset_17 = ((offset + 16u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_17 / 4];
+  const uint scalar_offset_18 = ((offset + 24u)) / 4;
+  uint4 ubo_load_8 = buffer[scalar_offset_18 / 4];
+  return float4x2(asfloat(((scalar_offset_15 & 2) ? ubo_load_5.zw : ubo_load_5.xy)), asfloat(((scalar_offset_16 & 2) ? ubo_load_6.zw : ubo_load_6.xy)), asfloat(((scalar_offset_17 & 2) ? ubo_load_7.zw : ubo_load_7.xy)), asfloat(((scalar_offset_18 & 2) ? ubo_load_8.zw : ubo_load_8.xy)));
+}
+
+float4x3 tint_symbol_23(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_19 = ((offset + 0u)) / 4;
+  const uint scalar_offset_20 = ((offset + 16u)) / 4;
+  const uint scalar_offset_21 = ((offset + 32u)) / 4;
+  const uint scalar_offset_22 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset_19 / 4].xyz), asfloat(buffer[scalar_offset_20 / 4].xyz), asfloat(buffer[scalar_offset_21 / 4].xyz), asfloat(buffer[scalar_offset_22 / 4].xyz));
+}
+
+float4x4 tint_symbol_24(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_23 = ((offset + 0u)) / 4;
+  const uint scalar_offset_24 = ((offset + 16u)) / 4;
+  const uint scalar_offset_25 = ((offset + 32u)) / 4;
+  const uint scalar_offset_26 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset_23 / 4]), asfloat(buffer[scalar_offset_24 / 4]), asfloat(buffer[scalar_offset_25 / 4]), asfloat(buffer[scalar_offset_26 / 4]));
+}
+
+matrix<float16_t, 2, 2> tint_symbol_25(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_27 = ((offset + 0u)) / 4;
+  uint ubo_load_9 = buffer[scalar_offset_27 / 4][scalar_offset_27 % 4];
+  const uint scalar_offset_28 = ((offset + 4u)) / 4;
+  uint ubo_load_10 = buffer[scalar_offset_28 / 4][scalar_offset_28 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_9 & 0xFFFF)), float16_t(f16tof32(ubo_load_9 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_10 & 0xFFFF)), float16_t(f16tof32(ubo_load_10 >> 16))));
+}
+
+matrix<float16_t, 2, 3> tint_symbol_26(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_29 = ((offset + 0u)) / 4;
+  uint4 ubo_load_12 = buffer[scalar_offset_29 / 4];
+  uint2 ubo_load_11 = ((scalar_offset_29 & 2) ? ubo_load_12.zw : ubo_load_12.xy);
+  vector<float16_t, 2> ubo_load_11_xz = vector<float16_t, 2>(f16tof32(ubo_load_11 & 0xFFFF));
+  float16_t ubo_load_11_y = f16tof32(ubo_load_11[0] >> 16);
+  const uint scalar_offset_30 = ((offset + 8u)) / 4;
+  uint4 ubo_load_14 = buffer[scalar_offset_30 / 4];
+  uint2 ubo_load_13 = ((scalar_offset_30 & 2) ? ubo_load_14.zw : ubo_load_14.xy);
+  vector<float16_t, 2> ubo_load_13_xz = vector<float16_t, 2>(f16tof32(ubo_load_13 & 0xFFFF));
+  float16_t ubo_load_13_y = f16tof32(ubo_load_13[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_11_xz[0], ubo_load_11_y, ubo_load_11_xz[1]), vector<float16_t, 3>(ubo_load_13_xz[0], ubo_load_13_y, ubo_load_13_xz[1]));
+}
+
+matrix<float16_t, 2, 4> tint_symbol_27(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_31 = ((offset + 0u)) / 4;
+  uint4 ubo_load_16 = buffer[scalar_offset_31 / 4];
+  uint2 ubo_load_15 = ((scalar_offset_31 & 2) ? ubo_load_16.zw : ubo_load_16.xy);
+  vector<float16_t, 2> ubo_load_15_xz = vector<float16_t, 2>(f16tof32(ubo_load_15 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_15_yw = vector<float16_t, 2>(f16tof32(ubo_load_15 >> 16));
+  const uint scalar_offset_32 = ((offset + 8u)) / 4;
+  uint4 ubo_load_18 = buffer[scalar_offset_32 / 4];
+  uint2 ubo_load_17 = ((scalar_offset_32 & 2) ? ubo_load_18.zw : ubo_load_18.xy);
+  vector<float16_t, 2> ubo_load_17_xz = vector<float16_t, 2>(f16tof32(ubo_load_17 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_17_yw = vector<float16_t, 2>(f16tof32(ubo_load_17 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_15_xz[0], ubo_load_15_yw[0], ubo_load_15_xz[1], ubo_load_15_yw[1]), vector<float16_t, 4>(ubo_load_17_xz[0], ubo_load_17_yw[0], ubo_load_17_xz[1], ubo_load_17_yw[1]));
+}
+
+matrix<float16_t, 3, 2> tint_symbol_28(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_33 = ((offset + 0u)) / 4;
+  uint ubo_load_19 = buffer[scalar_offset_33 / 4][scalar_offset_33 % 4];
+  const uint scalar_offset_34 = ((offset + 4u)) / 4;
+  uint ubo_load_20 = buffer[scalar_offset_34 / 4][scalar_offset_34 % 4];
+  const uint scalar_offset_35 = ((offset + 8u)) / 4;
+  uint ubo_load_21 = buffer[scalar_offset_35 / 4][scalar_offset_35 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_19 & 0xFFFF)), float16_t(f16tof32(ubo_load_19 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_20 & 0xFFFF)), float16_t(f16tof32(ubo_load_20 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_21 & 0xFFFF)), float16_t(f16tof32(ubo_load_21 >> 16))));
+}
+
+matrix<float16_t, 3, 3> tint_symbol_29(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_36 = ((offset + 0u)) / 4;
+  uint4 ubo_load_23 = buffer[scalar_offset_36 / 4];
+  uint2 ubo_load_22 = ((scalar_offset_36 & 2) ? ubo_load_23.zw : ubo_load_23.xy);
+  vector<float16_t, 2> ubo_load_22_xz = vector<float16_t, 2>(f16tof32(ubo_load_22 & 0xFFFF));
+  float16_t ubo_load_22_y = f16tof32(ubo_load_22[0] >> 16);
+  const uint scalar_offset_37 = ((offset + 8u)) / 4;
+  uint4 ubo_load_25 = buffer[scalar_offset_37 / 4];
+  uint2 ubo_load_24 = ((scalar_offset_37 & 2) ? ubo_load_25.zw : ubo_load_25.xy);
+  vector<float16_t, 2> ubo_load_24_xz = vector<float16_t, 2>(f16tof32(ubo_load_24 & 0xFFFF));
+  float16_t ubo_load_24_y = f16tof32(ubo_load_24[0] >> 16);
+  const uint scalar_offset_38 = ((offset + 16u)) / 4;
+  uint4 ubo_load_27 = buffer[scalar_offset_38 / 4];
+  uint2 ubo_load_26 = ((scalar_offset_38 & 2) ? ubo_load_27.zw : ubo_load_27.xy);
+  vector<float16_t, 2> ubo_load_26_xz = vector<float16_t, 2>(f16tof32(ubo_load_26 & 0xFFFF));
+  float16_t ubo_load_26_y = f16tof32(ubo_load_26[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_22_xz[0], ubo_load_22_y, ubo_load_22_xz[1]), vector<float16_t, 3>(ubo_load_24_xz[0], ubo_load_24_y, ubo_load_24_xz[1]), vector<float16_t, 3>(ubo_load_26_xz[0], ubo_load_26_y, ubo_load_26_xz[1]));
+}
+
+matrix<float16_t, 3, 4> tint_symbol_30(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_39 = ((offset + 0u)) / 4;
+  uint4 ubo_load_29 = buffer[scalar_offset_39 / 4];
+  uint2 ubo_load_28 = ((scalar_offset_39 & 2) ? ubo_load_29.zw : ubo_load_29.xy);
+  vector<float16_t, 2> ubo_load_28_xz = vector<float16_t, 2>(f16tof32(ubo_load_28 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_28_yw = vector<float16_t, 2>(f16tof32(ubo_load_28 >> 16));
+  const uint scalar_offset_40 = ((offset + 8u)) / 4;
+  uint4 ubo_load_31 = buffer[scalar_offset_40 / 4];
+  uint2 ubo_load_30 = ((scalar_offset_40 & 2) ? ubo_load_31.zw : ubo_load_31.xy);
+  vector<float16_t, 2> ubo_load_30_xz = vector<float16_t, 2>(f16tof32(ubo_load_30 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_30_yw = vector<float16_t, 2>(f16tof32(ubo_load_30 >> 16));
+  const uint scalar_offset_41 = ((offset + 16u)) / 4;
+  uint4 ubo_load_33 = buffer[scalar_offset_41 / 4];
+  uint2 ubo_load_32 = ((scalar_offset_41 & 2) ? ubo_load_33.zw : ubo_load_33.xy);
+  vector<float16_t, 2> ubo_load_32_xz = vector<float16_t, 2>(f16tof32(ubo_load_32 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_32_yw = vector<float16_t, 2>(f16tof32(ubo_load_32 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_28_xz[0], ubo_load_28_yw[0], ubo_load_28_xz[1], ubo_load_28_yw[1]), vector<float16_t, 4>(ubo_load_30_xz[0], ubo_load_30_yw[0], ubo_load_30_xz[1], ubo_load_30_yw[1]), vector<float16_t, 4>(ubo_load_32_xz[0], ubo_load_32_yw[0], ubo_load_32_xz[1], ubo_load_32_yw[1]));
+}
+
+matrix<float16_t, 4, 2> tint_symbol_31(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_42 = ((offset + 0u)) / 4;
+  uint ubo_load_34 = buffer[scalar_offset_42 / 4][scalar_offset_42 % 4];
+  const uint scalar_offset_43 = ((offset + 4u)) / 4;
+  uint ubo_load_35 = buffer[scalar_offset_43 / 4][scalar_offset_43 % 4];
+  const uint scalar_offset_44 = ((offset + 8u)) / 4;
+  uint ubo_load_36 = buffer[scalar_offset_44 / 4][scalar_offset_44 % 4];
+  const uint scalar_offset_45 = ((offset + 12u)) / 4;
+  uint ubo_load_37 = buffer[scalar_offset_45 / 4][scalar_offset_45 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_34 & 0xFFFF)), float16_t(f16tof32(ubo_load_34 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_35 & 0xFFFF)), float16_t(f16tof32(ubo_load_35 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_36 & 0xFFFF)), float16_t(f16tof32(ubo_load_36 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_37 & 0xFFFF)), float16_t(f16tof32(ubo_load_37 >> 16))));
+}
+
+matrix<float16_t, 4, 3> tint_symbol_32(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_46 = ((offset + 0u)) / 4;
+  uint4 ubo_load_39 = buffer[scalar_offset_46 / 4];
+  uint2 ubo_load_38 = ((scalar_offset_46 & 2) ? ubo_load_39.zw : ubo_load_39.xy);
+  vector<float16_t, 2> ubo_load_38_xz = vector<float16_t, 2>(f16tof32(ubo_load_38 & 0xFFFF));
+  float16_t ubo_load_38_y = f16tof32(ubo_load_38[0] >> 16);
+  const uint scalar_offset_47 = ((offset + 8u)) / 4;
+  uint4 ubo_load_41 = buffer[scalar_offset_47 / 4];
+  uint2 ubo_load_40 = ((scalar_offset_47 & 2) ? ubo_load_41.zw : ubo_load_41.xy);
+  vector<float16_t, 2> ubo_load_40_xz = vector<float16_t, 2>(f16tof32(ubo_load_40 & 0xFFFF));
+  float16_t ubo_load_40_y = f16tof32(ubo_load_40[0] >> 16);
+  const uint scalar_offset_48 = ((offset + 16u)) / 4;
+  uint4 ubo_load_43 = buffer[scalar_offset_48 / 4];
+  uint2 ubo_load_42 = ((scalar_offset_48 & 2) ? ubo_load_43.zw : ubo_load_43.xy);
+  vector<float16_t, 2> ubo_load_42_xz = vector<float16_t, 2>(f16tof32(ubo_load_42 & 0xFFFF));
+  float16_t ubo_load_42_y = f16tof32(ubo_load_42[0] >> 16);
+  const uint scalar_offset_49 = ((offset + 24u)) / 4;
+  uint4 ubo_load_45 = buffer[scalar_offset_49 / 4];
+  uint2 ubo_load_44 = ((scalar_offset_49 & 2) ? ubo_load_45.zw : ubo_load_45.xy);
+  vector<float16_t, 2> ubo_load_44_xz = vector<float16_t, 2>(f16tof32(ubo_load_44 & 0xFFFF));
+  float16_t ubo_load_44_y = f16tof32(ubo_load_44[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_38_xz[0], ubo_load_38_y, ubo_load_38_xz[1]), vector<float16_t, 3>(ubo_load_40_xz[0], ubo_load_40_y, ubo_load_40_xz[1]), vector<float16_t, 3>(ubo_load_42_xz[0], ubo_load_42_y, ubo_load_42_xz[1]), vector<float16_t, 3>(ubo_load_44_xz[0], ubo_load_44_y, ubo_load_44_xz[1]));
+}
+
+matrix<float16_t, 4, 4> tint_symbol_33(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_50 = ((offset + 0u)) / 4;
+  uint4 ubo_load_47 = buffer[scalar_offset_50 / 4];
+  uint2 ubo_load_46 = ((scalar_offset_50 & 2) ? ubo_load_47.zw : ubo_load_47.xy);
+  vector<float16_t, 2> ubo_load_46_xz = vector<float16_t, 2>(f16tof32(ubo_load_46 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_46_yw = vector<float16_t, 2>(f16tof32(ubo_load_46 >> 16));
+  const uint scalar_offset_51 = ((offset + 8u)) / 4;
+  uint4 ubo_load_49 = buffer[scalar_offset_51 / 4];
+  uint2 ubo_load_48 = ((scalar_offset_51 & 2) ? ubo_load_49.zw : ubo_load_49.xy);
+  vector<float16_t, 2> ubo_load_48_xz = vector<float16_t, 2>(f16tof32(ubo_load_48 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_48_yw = vector<float16_t, 2>(f16tof32(ubo_load_48 >> 16));
+  const uint scalar_offset_52 = ((offset + 16u)) / 4;
+  uint4 ubo_load_51 = buffer[scalar_offset_52 / 4];
+  uint2 ubo_load_50 = ((scalar_offset_52 & 2) ? ubo_load_51.zw : ubo_load_51.xy);
+  vector<float16_t, 2> ubo_load_50_xz = vector<float16_t, 2>(f16tof32(ubo_load_50 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_50_yw = vector<float16_t, 2>(f16tof32(ubo_load_50 >> 16));
+  const uint scalar_offset_53 = ((offset + 24u)) / 4;
+  uint4 ubo_load_53 = buffer[scalar_offset_53 / 4];
+  uint2 ubo_load_52 = ((scalar_offset_53 & 2) ? ubo_load_53.zw : ubo_load_53.xy);
+  vector<float16_t, 2> ubo_load_52_xz = vector<float16_t, 2>(f16tof32(ubo_load_52 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_52_yw = vector<float16_t, 2>(f16tof32(ubo_load_52 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_46_xz[0], ubo_load_46_yw[0], ubo_load_46_xz[1], ubo_load_46_yw[1]), vector<float16_t, 4>(ubo_load_48_xz[0], ubo_load_48_yw[0], ubo_load_48_xz[1], ubo_load_48_yw[1]), vector<float16_t, 4>(ubo_load_50_xz[0], ubo_load_50_yw[0], ubo_load_50_xz[1], ubo_load_50_yw[1]), vector<float16_t, 4>(ubo_load_52_xz[0], ubo_load_52_yw[0], ubo_load_52_xz[1], ubo_load_52_yw[1]));
+}
+
+typedef float3 tint_symbol_34_ret[2];
+tint_symbol_34_ret tint_symbol_34(uint4 buffer[55], uint offset) {
+  float3 arr[2] = (float3[2])0;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      const uint scalar_offset_54 = ((offset + (i * 16u))) / 4;
+      arr[i] = asfloat(buffer[scalar_offset_54 / 4].xyz);
+    }
+  }
+  return arr;
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_35_ret[2];
+tint_symbol_35_ret tint_symbol_35(uint4 buffer[55], uint offset) {
+  matrix<float16_t, 4, 2> arr_1[2] = (matrix<float16_t, 4, 2>[2])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_31(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr_1;
+}
+
+Inner tint_symbol_36(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_55 = ((offset + 0u)) / 4;
+  const uint scalar_offset_56 = ((offset + 4u)) / 4;
+  const uint scalar_offset_bytes = ((offset + 8u));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const Inner tint_symbol_38 = {asint(buffer[scalar_offset_55 / 4][scalar_offset_55 % 4]), asfloat(buffer[scalar_offset_56 / 4][scalar_offset_56 % 4]), float16_t(f16tof32(((buffer[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)))};
+  return tint_symbol_38;
+}
+
+typedef Inner tint_symbol_37_ret[4];
+tint_symbol_37_ret tint_symbol_37(uint4 buffer[55], uint offset) {
+  Inner arr_2[4] = (Inner[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_2[i_2] = tint_symbol_36(buffer, (offset + (i_2 * 16u)));
+    }
+  }
+  return arr_2;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float scalar_f32 = asfloat(ub[0].x);
+  const int scalar_i32 = asint(ub[0].y);
+  const uint scalar_u32 = ub[0].z;
+  const float16_t scalar_f16 = float16_t(f16tof32(((ub[0].w) & 0xFFFF)));
+  const float2 vec2_f32 = asfloat(ub[1].xy);
+  const int2 vec2_i32 = asint(ub[1].zw);
+  const uint2 vec2_u32 = ub[2].xy;
+  uint ubo_load_54 = ub[2].z;
+  const vector<float16_t, 2> vec2_f16 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_54 & 0xFFFF)), float16_t(f16tof32(ubo_load_54 >> 16)));
+  const float3 vec3_f32 = asfloat(ub[3].xyz);
+  const int3 vec3_i32 = asint(ub[4].xyz);
+  const uint3 vec3_u32 = ub[5].xyz;
+  uint2 ubo_load_55 = ub[6].xy;
+  vector<float16_t, 2> ubo_load_55_xz = vector<float16_t, 2>(f16tof32(ubo_load_55 & 0xFFFF));
+  float16_t ubo_load_55_y = f16tof32(ubo_load_55[0] >> 16);
+  const vector<float16_t, 3> vec3_f16 = vector<float16_t, 3>(ubo_load_55_xz[0], ubo_load_55_y, ubo_load_55_xz[1]);
+  const float4 vec4_f32 = asfloat(ub[7]);
+  const int4 vec4_i32 = asint(ub[8]);
+  const uint4 vec4_u32 = ub[9];
+  uint2 ubo_load_56 = ub[10].xy;
+  vector<float16_t, 2> ubo_load_56_xz = vector<float16_t, 2>(f16tof32(ubo_load_56 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_56_yw = vector<float16_t, 2>(f16tof32(ubo_load_56 >> 16));
+  const vector<float16_t, 4> vec4_f16 = vector<float16_t, 4>(ubo_load_56_xz[0], ubo_load_56_yw[0], ubo_load_56_xz[1], ubo_load_56_yw[1]);
+  const float2x2 mat2x2_f32 = tint_symbol_16(ub, 168u);
+  const float2x3 mat2x3_f32 = tint_symbol_17(ub, 192u);
+  const float2x4 mat2x4_f32 = tint_symbol_18(ub, 224u);
+  const float3x2 mat3x2_f32 = tint_symbol_19(ub, 256u);
+  const float3x3 mat3x3_f32 = tint_symbol_20(ub, 288u);
+  const float3x4 mat3x4_f32 = tint_symbol_21(ub, 336u);
+  const float4x2 mat4x2_f32 = tint_symbol_22(ub, 384u);
+  const float4x3 mat4x3_f32 = tint_symbol_23(ub, 416u);
+  const float4x4 mat4x4_f32 = tint_symbol_24(ub, 480u);
+  const matrix<float16_t, 2, 2> mat2x2_f16 = tint_symbol_25(ub, 544u);
+  const matrix<float16_t, 2, 3> mat2x3_f16 = tint_symbol_26(ub, 552u);
+  const matrix<float16_t, 2, 4> mat2x4_f16 = tint_symbol_27(ub, 568u);
+  const matrix<float16_t, 3, 2> mat3x2_f16 = tint_symbol_28(ub, 584u);
+  const matrix<float16_t, 3, 3> mat3x3_f16 = tint_symbol_29(ub, 600u);
+  const matrix<float16_t, 3, 4> mat3x4_f16 = tint_symbol_30(ub, 624u);
+  const matrix<float16_t, 4, 2> mat4x2_f16 = tint_symbol_31(ub, 648u);
+  const matrix<float16_t, 4, 3> mat4x3_f16 = tint_symbol_32(ub, 664u);
+  const matrix<float16_t, 4, 4> mat4x4_f16 = tint_symbol_33(ub, 696u);
+  const float3 arr2_vec3_f32[2] = tint_symbol_34(ub, 736u);
+  const matrix<float16_t, 4, 2> arr2_mat4x2_f16[2] = tint_symbol_35(ub, 768u);
+  const Inner struct_inner = tint_symbol_36(ub, 800u);
+  const Inner array_struct_inner[4] = tint_symbol_37(ub, 816u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..1ee7543
--- /dev/null
+++ b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,323 @@
+SKIP: FAILED
+
+struct Inner {
+  int scalar_i32;
+  float scalar_f32;
+  float16_t scalar_f16;
+};
+
+cbuffer cbuffer_ub : register(b0, space0) {
+  uint4 ub[55];
+};
+
+float2x2 tint_symbol_16(uint4 buffer[55], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+float2x3 tint_symbol_17(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+float2x4 tint_symbol_18(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset_4 / 4]), asfloat(buffer[scalar_offset_5 / 4]));
+}
+
+float3x2 tint_symbol_19(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_6 = ((offset + 0u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_6 / 4];
+  const uint scalar_offset_7 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_7 / 4];
+  const uint scalar_offset_8 = ((offset + 16u)) / 4;
+  uint4 ubo_load_4 = buffer[scalar_offset_8 / 4];
+  return float3x2(asfloat(((scalar_offset_6 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_7 & 2) ? ubo_load_3.zw : ubo_load_3.xy)), asfloat(((scalar_offset_8 & 2) ? ubo_load_4.zw : ubo_load_4.xy)));
+}
+
+float3x3 tint_symbol_20(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_9 = ((offset + 0u)) / 4;
+  const uint scalar_offset_10 = ((offset + 16u)) / 4;
+  const uint scalar_offset_11 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset_9 / 4].xyz), asfloat(buffer[scalar_offset_10 / 4].xyz), asfloat(buffer[scalar_offset_11 / 4].xyz));
+}
+
+float3x4 tint_symbol_21(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_12 = ((offset + 0u)) / 4;
+  const uint scalar_offset_13 = ((offset + 16u)) / 4;
+  const uint scalar_offset_14 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset_12 / 4]), asfloat(buffer[scalar_offset_13 / 4]), asfloat(buffer[scalar_offset_14 / 4]));
+}
+
+float4x2 tint_symbol_22(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_15 = ((offset + 0u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_15 / 4];
+  const uint scalar_offset_16 = ((offset + 8u)) / 4;
+  uint4 ubo_load_6 = buffer[scalar_offset_16 / 4];
+  const uint scalar_offset_17 = ((offset + 16u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_17 / 4];
+  const uint scalar_offset_18 = ((offset + 24u)) / 4;
+  uint4 ubo_load_8 = buffer[scalar_offset_18 / 4];
+  return float4x2(asfloat(((scalar_offset_15 & 2) ? ubo_load_5.zw : ubo_load_5.xy)), asfloat(((scalar_offset_16 & 2) ? ubo_load_6.zw : ubo_load_6.xy)), asfloat(((scalar_offset_17 & 2) ? ubo_load_7.zw : ubo_load_7.xy)), asfloat(((scalar_offset_18 & 2) ? ubo_load_8.zw : ubo_load_8.xy)));
+}
+
+float4x3 tint_symbol_23(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_19 = ((offset + 0u)) / 4;
+  const uint scalar_offset_20 = ((offset + 16u)) / 4;
+  const uint scalar_offset_21 = ((offset + 32u)) / 4;
+  const uint scalar_offset_22 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset_19 / 4].xyz), asfloat(buffer[scalar_offset_20 / 4].xyz), asfloat(buffer[scalar_offset_21 / 4].xyz), asfloat(buffer[scalar_offset_22 / 4].xyz));
+}
+
+float4x4 tint_symbol_24(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_23 = ((offset + 0u)) / 4;
+  const uint scalar_offset_24 = ((offset + 16u)) / 4;
+  const uint scalar_offset_25 = ((offset + 32u)) / 4;
+  const uint scalar_offset_26 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset_23 / 4]), asfloat(buffer[scalar_offset_24 / 4]), asfloat(buffer[scalar_offset_25 / 4]), asfloat(buffer[scalar_offset_26 / 4]));
+}
+
+matrix<float16_t, 2, 2> tint_symbol_25(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_27 = ((offset + 0u)) / 4;
+  uint ubo_load_9 = buffer[scalar_offset_27 / 4][scalar_offset_27 % 4];
+  const uint scalar_offset_28 = ((offset + 4u)) / 4;
+  uint ubo_load_10 = buffer[scalar_offset_28 / 4][scalar_offset_28 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_9 & 0xFFFF)), float16_t(f16tof32(ubo_load_9 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_10 & 0xFFFF)), float16_t(f16tof32(ubo_load_10 >> 16))));
+}
+
+matrix<float16_t, 2, 3> tint_symbol_26(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_29 = ((offset + 0u)) / 4;
+  uint4 ubo_load_12 = buffer[scalar_offset_29 / 4];
+  uint2 ubo_load_11 = ((scalar_offset_29 & 2) ? ubo_load_12.zw : ubo_load_12.xy);
+  vector<float16_t, 2> ubo_load_11_xz = vector<float16_t, 2>(f16tof32(ubo_load_11 & 0xFFFF));
+  float16_t ubo_load_11_y = f16tof32(ubo_load_11[0] >> 16);
+  const uint scalar_offset_30 = ((offset + 8u)) / 4;
+  uint4 ubo_load_14 = buffer[scalar_offset_30 / 4];
+  uint2 ubo_load_13 = ((scalar_offset_30 & 2) ? ubo_load_14.zw : ubo_load_14.xy);
+  vector<float16_t, 2> ubo_load_13_xz = vector<float16_t, 2>(f16tof32(ubo_load_13 & 0xFFFF));
+  float16_t ubo_load_13_y = f16tof32(ubo_load_13[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_11_xz[0], ubo_load_11_y, ubo_load_11_xz[1]), vector<float16_t, 3>(ubo_load_13_xz[0], ubo_load_13_y, ubo_load_13_xz[1]));
+}
+
+matrix<float16_t, 2, 4> tint_symbol_27(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_31 = ((offset + 0u)) / 4;
+  uint4 ubo_load_16 = buffer[scalar_offset_31 / 4];
+  uint2 ubo_load_15 = ((scalar_offset_31 & 2) ? ubo_load_16.zw : ubo_load_16.xy);
+  vector<float16_t, 2> ubo_load_15_xz = vector<float16_t, 2>(f16tof32(ubo_load_15 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_15_yw = vector<float16_t, 2>(f16tof32(ubo_load_15 >> 16));
+  const uint scalar_offset_32 = ((offset + 8u)) / 4;
+  uint4 ubo_load_18 = buffer[scalar_offset_32 / 4];
+  uint2 ubo_load_17 = ((scalar_offset_32 & 2) ? ubo_load_18.zw : ubo_load_18.xy);
+  vector<float16_t, 2> ubo_load_17_xz = vector<float16_t, 2>(f16tof32(ubo_load_17 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_17_yw = vector<float16_t, 2>(f16tof32(ubo_load_17 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_15_xz[0], ubo_load_15_yw[0], ubo_load_15_xz[1], ubo_load_15_yw[1]), vector<float16_t, 4>(ubo_load_17_xz[0], ubo_load_17_yw[0], ubo_load_17_xz[1], ubo_load_17_yw[1]));
+}
+
+matrix<float16_t, 3, 2> tint_symbol_28(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_33 = ((offset + 0u)) / 4;
+  uint ubo_load_19 = buffer[scalar_offset_33 / 4][scalar_offset_33 % 4];
+  const uint scalar_offset_34 = ((offset + 4u)) / 4;
+  uint ubo_load_20 = buffer[scalar_offset_34 / 4][scalar_offset_34 % 4];
+  const uint scalar_offset_35 = ((offset + 8u)) / 4;
+  uint ubo_load_21 = buffer[scalar_offset_35 / 4][scalar_offset_35 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_19 & 0xFFFF)), float16_t(f16tof32(ubo_load_19 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_20 & 0xFFFF)), float16_t(f16tof32(ubo_load_20 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_21 & 0xFFFF)), float16_t(f16tof32(ubo_load_21 >> 16))));
+}
+
+matrix<float16_t, 3, 3> tint_symbol_29(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_36 = ((offset + 0u)) / 4;
+  uint4 ubo_load_23 = buffer[scalar_offset_36 / 4];
+  uint2 ubo_load_22 = ((scalar_offset_36 & 2) ? ubo_load_23.zw : ubo_load_23.xy);
+  vector<float16_t, 2> ubo_load_22_xz = vector<float16_t, 2>(f16tof32(ubo_load_22 & 0xFFFF));
+  float16_t ubo_load_22_y = f16tof32(ubo_load_22[0] >> 16);
+  const uint scalar_offset_37 = ((offset + 8u)) / 4;
+  uint4 ubo_load_25 = buffer[scalar_offset_37 / 4];
+  uint2 ubo_load_24 = ((scalar_offset_37 & 2) ? ubo_load_25.zw : ubo_load_25.xy);
+  vector<float16_t, 2> ubo_load_24_xz = vector<float16_t, 2>(f16tof32(ubo_load_24 & 0xFFFF));
+  float16_t ubo_load_24_y = f16tof32(ubo_load_24[0] >> 16);
+  const uint scalar_offset_38 = ((offset + 16u)) / 4;
+  uint4 ubo_load_27 = buffer[scalar_offset_38 / 4];
+  uint2 ubo_load_26 = ((scalar_offset_38 & 2) ? ubo_load_27.zw : ubo_load_27.xy);
+  vector<float16_t, 2> ubo_load_26_xz = vector<float16_t, 2>(f16tof32(ubo_load_26 & 0xFFFF));
+  float16_t ubo_load_26_y = f16tof32(ubo_load_26[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_22_xz[0], ubo_load_22_y, ubo_load_22_xz[1]), vector<float16_t, 3>(ubo_load_24_xz[0], ubo_load_24_y, ubo_load_24_xz[1]), vector<float16_t, 3>(ubo_load_26_xz[0], ubo_load_26_y, ubo_load_26_xz[1]));
+}
+
+matrix<float16_t, 3, 4> tint_symbol_30(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_39 = ((offset + 0u)) / 4;
+  uint4 ubo_load_29 = buffer[scalar_offset_39 / 4];
+  uint2 ubo_load_28 = ((scalar_offset_39 & 2) ? ubo_load_29.zw : ubo_load_29.xy);
+  vector<float16_t, 2> ubo_load_28_xz = vector<float16_t, 2>(f16tof32(ubo_load_28 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_28_yw = vector<float16_t, 2>(f16tof32(ubo_load_28 >> 16));
+  const uint scalar_offset_40 = ((offset + 8u)) / 4;
+  uint4 ubo_load_31 = buffer[scalar_offset_40 / 4];
+  uint2 ubo_load_30 = ((scalar_offset_40 & 2) ? ubo_load_31.zw : ubo_load_31.xy);
+  vector<float16_t, 2> ubo_load_30_xz = vector<float16_t, 2>(f16tof32(ubo_load_30 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_30_yw = vector<float16_t, 2>(f16tof32(ubo_load_30 >> 16));
+  const uint scalar_offset_41 = ((offset + 16u)) / 4;
+  uint4 ubo_load_33 = buffer[scalar_offset_41 / 4];
+  uint2 ubo_load_32 = ((scalar_offset_41 & 2) ? ubo_load_33.zw : ubo_load_33.xy);
+  vector<float16_t, 2> ubo_load_32_xz = vector<float16_t, 2>(f16tof32(ubo_load_32 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_32_yw = vector<float16_t, 2>(f16tof32(ubo_load_32 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_28_xz[0], ubo_load_28_yw[0], ubo_load_28_xz[1], ubo_load_28_yw[1]), vector<float16_t, 4>(ubo_load_30_xz[0], ubo_load_30_yw[0], ubo_load_30_xz[1], ubo_load_30_yw[1]), vector<float16_t, 4>(ubo_load_32_xz[0], ubo_load_32_yw[0], ubo_load_32_xz[1], ubo_load_32_yw[1]));
+}
+
+matrix<float16_t, 4, 2> tint_symbol_31(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_42 = ((offset + 0u)) / 4;
+  uint ubo_load_34 = buffer[scalar_offset_42 / 4][scalar_offset_42 % 4];
+  const uint scalar_offset_43 = ((offset + 4u)) / 4;
+  uint ubo_load_35 = buffer[scalar_offset_43 / 4][scalar_offset_43 % 4];
+  const uint scalar_offset_44 = ((offset + 8u)) / 4;
+  uint ubo_load_36 = buffer[scalar_offset_44 / 4][scalar_offset_44 % 4];
+  const uint scalar_offset_45 = ((offset + 12u)) / 4;
+  uint ubo_load_37 = buffer[scalar_offset_45 / 4][scalar_offset_45 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_34 & 0xFFFF)), float16_t(f16tof32(ubo_load_34 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_35 & 0xFFFF)), float16_t(f16tof32(ubo_load_35 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_36 & 0xFFFF)), float16_t(f16tof32(ubo_load_36 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_37 & 0xFFFF)), float16_t(f16tof32(ubo_load_37 >> 16))));
+}
+
+matrix<float16_t, 4, 3> tint_symbol_32(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_46 = ((offset + 0u)) / 4;
+  uint4 ubo_load_39 = buffer[scalar_offset_46 / 4];
+  uint2 ubo_load_38 = ((scalar_offset_46 & 2) ? ubo_load_39.zw : ubo_load_39.xy);
+  vector<float16_t, 2> ubo_load_38_xz = vector<float16_t, 2>(f16tof32(ubo_load_38 & 0xFFFF));
+  float16_t ubo_load_38_y = f16tof32(ubo_load_38[0] >> 16);
+  const uint scalar_offset_47 = ((offset + 8u)) / 4;
+  uint4 ubo_load_41 = buffer[scalar_offset_47 / 4];
+  uint2 ubo_load_40 = ((scalar_offset_47 & 2) ? ubo_load_41.zw : ubo_load_41.xy);
+  vector<float16_t, 2> ubo_load_40_xz = vector<float16_t, 2>(f16tof32(ubo_load_40 & 0xFFFF));
+  float16_t ubo_load_40_y = f16tof32(ubo_load_40[0] >> 16);
+  const uint scalar_offset_48 = ((offset + 16u)) / 4;
+  uint4 ubo_load_43 = buffer[scalar_offset_48 / 4];
+  uint2 ubo_load_42 = ((scalar_offset_48 & 2) ? ubo_load_43.zw : ubo_load_43.xy);
+  vector<float16_t, 2> ubo_load_42_xz = vector<float16_t, 2>(f16tof32(ubo_load_42 & 0xFFFF));
+  float16_t ubo_load_42_y = f16tof32(ubo_load_42[0] >> 16);
+  const uint scalar_offset_49 = ((offset + 24u)) / 4;
+  uint4 ubo_load_45 = buffer[scalar_offset_49 / 4];
+  uint2 ubo_load_44 = ((scalar_offset_49 & 2) ? ubo_load_45.zw : ubo_load_45.xy);
+  vector<float16_t, 2> ubo_load_44_xz = vector<float16_t, 2>(f16tof32(ubo_load_44 & 0xFFFF));
+  float16_t ubo_load_44_y = f16tof32(ubo_load_44[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_38_xz[0], ubo_load_38_y, ubo_load_38_xz[1]), vector<float16_t, 3>(ubo_load_40_xz[0], ubo_load_40_y, ubo_load_40_xz[1]), vector<float16_t, 3>(ubo_load_42_xz[0], ubo_load_42_y, ubo_load_42_xz[1]), vector<float16_t, 3>(ubo_load_44_xz[0], ubo_load_44_y, ubo_load_44_xz[1]));
+}
+
+matrix<float16_t, 4, 4> tint_symbol_33(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_50 = ((offset + 0u)) / 4;
+  uint4 ubo_load_47 = buffer[scalar_offset_50 / 4];
+  uint2 ubo_load_46 = ((scalar_offset_50 & 2) ? ubo_load_47.zw : ubo_load_47.xy);
+  vector<float16_t, 2> ubo_load_46_xz = vector<float16_t, 2>(f16tof32(ubo_load_46 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_46_yw = vector<float16_t, 2>(f16tof32(ubo_load_46 >> 16));
+  const uint scalar_offset_51 = ((offset + 8u)) / 4;
+  uint4 ubo_load_49 = buffer[scalar_offset_51 / 4];
+  uint2 ubo_load_48 = ((scalar_offset_51 & 2) ? ubo_load_49.zw : ubo_load_49.xy);
+  vector<float16_t, 2> ubo_load_48_xz = vector<float16_t, 2>(f16tof32(ubo_load_48 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_48_yw = vector<float16_t, 2>(f16tof32(ubo_load_48 >> 16));
+  const uint scalar_offset_52 = ((offset + 16u)) / 4;
+  uint4 ubo_load_51 = buffer[scalar_offset_52 / 4];
+  uint2 ubo_load_50 = ((scalar_offset_52 & 2) ? ubo_load_51.zw : ubo_load_51.xy);
+  vector<float16_t, 2> ubo_load_50_xz = vector<float16_t, 2>(f16tof32(ubo_load_50 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_50_yw = vector<float16_t, 2>(f16tof32(ubo_load_50 >> 16));
+  const uint scalar_offset_53 = ((offset + 24u)) / 4;
+  uint4 ubo_load_53 = buffer[scalar_offset_53 / 4];
+  uint2 ubo_load_52 = ((scalar_offset_53 & 2) ? ubo_load_53.zw : ubo_load_53.xy);
+  vector<float16_t, 2> ubo_load_52_xz = vector<float16_t, 2>(f16tof32(ubo_load_52 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_52_yw = vector<float16_t, 2>(f16tof32(ubo_load_52 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_46_xz[0], ubo_load_46_yw[0], ubo_load_46_xz[1], ubo_load_46_yw[1]), vector<float16_t, 4>(ubo_load_48_xz[0], ubo_load_48_yw[0], ubo_load_48_xz[1], ubo_load_48_yw[1]), vector<float16_t, 4>(ubo_load_50_xz[0], ubo_load_50_yw[0], ubo_load_50_xz[1], ubo_load_50_yw[1]), vector<float16_t, 4>(ubo_load_52_xz[0], ubo_load_52_yw[0], ubo_load_52_xz[1], ubo_load_52_yw[1]));
+}
+
+typedef float3 tint_symbol_34_ret[2];
+tint_symbol_34_ret tint_symbol_34(uint4 buffer[55], uint offset) {
+  float3 arr[2] = (float3[2])0;
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      const uint scalar_offset_54 = ((offset + (i * 16u))) / 4;
+      arr[i] = asfloat(buffer[scalar_offset_54 / 4].xyz);
+    }
+  }
+  return arr;
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_35_ret[2];
+tint_symbol_35_ret tint_symbol_35(uint4 buffer[55], uint offset) {
+  matrix<float16_t, 4, 2> arr_1[2] = (matrix<float16_t, 4, 2>[2])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_31(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr_1;
+}
+
+Inner tint_symbol_36(uint4 buffer[55], uint offset) {
+  const uint scalar_offset_55 = ((offset + 0u)) / 4;
+  const uint scalar_offset_56 = ((offset + 4u)) / 4;
+  const uint scalar_offset_bytes = ((offset + 8u));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const Inner tint_symbol_38 = {asint(buffer[scalar_offset_55 / 4][scalar_offset_55 % 4]), asfloat(buffer[scalar_offset_56 / 4][scalar_offset_56 % 4]), float16_t(f16tof32(((buffer[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)))};
+  return tint_symbol_38;
+}
+
+typedef Inner tint_symbol_37_ret[4];
+tint_symbol_37_ret tint_symbol_37(uint4 buffer[55], uint offset) {
+  Inner arr_2[4] = (Inner[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_2[i_2] = tint_symbol_36(buffer, (offset + (i_2 * 16u)));
+    }
+  }
+  return arr_2;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float scalar_f32 = asfloat(ub[0].x);
+  const int scalar_i32 = asint(ub[0].y);
+  const uint scalar_u32 = ub[0].z;
+  const float16_t scalar_f16 = float16_t(f16tof32(((ub[0].w) & 0xFFFF)));
+  const float2 vec2_f32 = asfloat(ub[1].xy);
+  const int2 vec2_i32 = asint(ub[1].zw);
+  const uint2 vec2_u32 = ub[2].xy;
+  uint ubo_load_54 = ub[2].z;
+  const vector<float16_t, 2> vec2_f16 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_54 & 0xFFFF)), float16_t(f16tof32(ubo_load_54 >> 16)));
+  const float3 vec3_f32 = asfloat(ub[3].xyz);
+  const int3 vec3_i32 = asint(ub[4].xyz);
+  const uint3 vec3_u32 = ub[5].xyz;
+  uint2 ubo_load_55 = ub[6].xy;
+  vector<float16_t, 2> ubo_load_55_xz = vector<float16_t, 2>(f16tof32(ubo_load_55 & 0xFFFF));
+  float16_t ubo_load_55_y = f16tof32(ubo_load_55[0] >> 16);
+  const vector<float16_t, 3> vec3_f16 = vector<float16_t, 3>(ubo_load_55_xz[0], ubo_load_55_y, ubo_load_55_xz[1]);
+  const float4 vec4_f32 = asfloat(ub[7]);
+  const int4 vec4_i32 = asint(ub[8]);
+  const uint4 vec4_u32 = ub[9];
+  uint2 ubo_load_56 = ub[10].xy;
+  vector<float16_t, 2> ubo_load_56_xz = vector<float16_t, 2>(f16tof32(ubo_load_56 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_56_yw = vector<float16_t, 2>(f16tof32(ubo_load_56 >> 16));
+  const vector<float16_t, 4> vec4_f16 = vector<float16_t, 4>(ubo_load_56_xz[0], ubo_load_56_yw[0], ubo_load_56_xz[1], ubo_load_56_yw[1]);
+  const float2x2 mat2x2_f32 = tint_symbol_16(ub, 168u);
+  const float2x3 mat2x3_f32 = tint_symbol_17(ub, 192u);
+  const float2x4 mat2x4_f32 = tint_symbol_18(ub, 224u);
+  const float3x2 mat3x2_f32 = tint_symbol_19(ub, 256u);
+  const float3x3 mat3x3_f32 = tint_symbol_20(ub, 288u);
+  const float3x4 mat3x4_f32 = tint_symbol_21(ub, 336u);
+  const float4x2 mat4x2_f32 = tint_symbol_22(ub, 384u);
+  const float4x3 mat4x3_f32 = tint_symbol_23(ub, 416u);
+  const float4x4 mat4x4_f32 = tint_symbol_24(ub, 480u);
+  const matrix<float16_t, 2, 2> mat2x2_f16 = tint_symbol_25(ub, 544u);
+  const matrix<float16_t, 2, 3> mat2x3_f16 = tint_symbol_26(ub, 552u);
+  const matrix<float16_t, 2, 4> mat2x4_f16 = tint_symbol_27(ub, 568u);
+  const matrix<float16_t, 3, 2> mat3x2_f16 = tint_symbol_28(ub, 584u);
+  const matrix<float16_t, 3, 3> mat3x3_f16 = tint_symbol_29(ub, 600u);
+  const matrix<float16_t, 3, 4> mat3x4_f16 = tint_symbol_30(ub, 624u);
+  const matrix<float16_t, 4, 2> mat4x2_f16 = tint_symbol_31(ub, 648u);
+  const matrix<float16_t, 4, 3> mat4x3_f16 = tint_symbol_32(ub, 664u);
+  const matrix<float16_t, 4, 4> mat4x4_f16 = tint_symbol_33(ub, 696u);
+  const float3 arr2_vec3_f32[2] = tint_symbol_34(ub, 736u);
+  const matrix<float16_t, 4, 2> arr2_mat4x2_f16[2] = tint_symbol_35(ub, 768u);
+  const Inner struct_inner = tint_symbol_36(ub, 800u);
+  const Inner array_struct_inner[4] = tint_symbol_37(ub, 816u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A0147F5100(4,3-11): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..bf9f260
--- /dev/null
+++ b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.glsl

@@ -0,0 +1,261 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x2_f16_4 {
+  f16vec2 col0;
+  f16vec2 col1;
+  f16vec2 col2;
+  f16vec2 col3;
+};
+
+struct Inner {
+  int scalar_i32;
+  float scalar_f32;
+  float16_t scalar_f16;
+  uint pad;
+};
+
+struct S {
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
+  float16_t scalar_f16;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  f16vec2 vec2_f16;
+  uint pad_1;
+  vec3 vec3_f32;
+  uint pad_2;
+  ivec3 vec3_i32;
+  uint pad_3;
+  uvec3 vec3_u32;
+  uint pad_4;
+  f16vec3 vec3_f16;
+  uint pad_5;
+  uint pad_6;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  f16vec4 vec4_f16;
+  mat2 mat2x2_f32;
+  uint pad_7;
+  uint pad_8;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  mat3x2 mat3x2_f32;
+  uint pad_9;
+  uint pad_10;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  mat4x2 mat4x2_f32;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  f16mat2 mat2x2_f16;
+  f16mat2x3 mat2x3_f16;
+  f16mat2x4 mat2x4_f16;
+  f16mat3x2 mat3x2_f16;
+  uint pad_11;
+  f16mat3 mat3x3_f16;
+  f16mat3x4 mat3x4_f16;
+  f16mat4x2 mat4x2_f16;
+  f16mat4x3 mat4x3_f16;
+  f16mat4 mat4x4_f16;
+  uint pad_12;
+  uint pad_13;
+  vec3 arr2_vec3_f32[2];
+  f16mat4x2 arr2_mat4x2_f16[2];
+  Inner struct_inner;
+  Inner array_struct_inner[4];
+};
+
+struct S_std140 {
+  float scalar_f32;
+  int scalar_i32;
+  uint scalar_u32;
+  float16_t scalar_f16;
+  vec2 vec2_f32;
+  ivec2 vec2_i32;
+  uvec2 vec2_u32;
+  f16vec2 vec2_f16;
+  uint pad_1;
+  vec3 vec3_f32;
+  uint pad_2;
+  ivec3 vec3_i32;
+  uint pad_3;
+  uvec3 vec3_u32;
+  uint pad_4;
+  f16vec3 vec3_f16;
+  uint pad_5;
+  uint pad_6;
+  vec4 vec4_f32;
+  ivec4 vec4_i32;
+  uvec4 vec4_u32;
+  f16vec4 vec4_f16;
+  vec2 mat2x2_f32_0;
+  vec2 mat2x2_f32_1;
+  uint pad_7;
+  uint pad_8;
+  mat2x3 mat2x3_f32;
+  mat2x4 mat2x4_f32;
+  vec2 mat3x2_f32_0;
+  vec2 mat3x2_f32_1;
+  vec2 mat3x2_f32_2;
+  uint pad_9;
+  uint pad_10;
+  mat3 mat3x3_f32;
+  mat3x4 mat3x4_f32;
+  vec2 mat4x2_f32_0;
+  vec2 mat4x2_f32_1;
+  vec2 mat4x2_f32_2;
+  vec2 mat4x2_f32_3;
+  mat4x3 mat4x3_f32;
+  mat4 mat4x4_f32;
+  f16vec2 mat2x2_f16_0;
+  f16vec2 mat2x2_f16_1;
+  f16vec3 mat2x3_f16_0;
+  f16vec3 mat2x3_f16_1;
+  f16vec4 mat2x4_f16_0;
+  f16vec4 mat2x4_f16_1;
+  f16vec2 mat3x2_f16_0;
+  f16vec2 mat3x2_f16_1;
+  f16vec2 mat3x2_f16_2;
+  uint pad_11;
+  f16vec3 mat3x3_f16_0;
+  f16vec3 mat3x3_f16_1;
+  f16vec3 mat3x3_f16_2;
+  f16vec4 mat3x4_f16_0;
+  f16vec4 mat3x4_f16_1;
+  f16vec4 mat3x4_f16_2;
+  f16vec2 mat4x2_f16_0;
+  f16vec2 mat4x2_f16_1;
+  f16vec2 mat4x2_f16_2;
+  f16vec2 mat4x2_f16_3;
+  f16vec3 mat4x3_f16_0;
+  f16vec3 mat4x3_f16_1;
+  f16vec3 mat4x3_f16_2;
+  f16vec3 mat4x3_f16_3;
+  f16vec4 mat4x4_f16_0;
+  f16vec4 mat4x4_f16_1;
+  f16vec4 mat4x4_f16_2;
+  f16vec4 mat4x4_f16_3;
+  uint pad_12;
+  uint pad_13;
+  vec3 arr2_vec3_f32[2];
+  mat4x2_f16_4 arr2_mat4x2_f16[2];
+  Inner struct_inner;
+  Inner array_struct_inner[4];
+};
+
+layout(binding = 0, std140) uniform ub_block_std140_ubo {
+  S_std140 inner;
+} ub;
+
+mat2 load_ub_inner_mat2x2_f32() {
+  return mat2(ub.inner.mat2x2_f32_0, ub.inner.mat2x2_f32_1);
+}
+
+mat3x2 load_ub_inner_mat3x2_f32() {
+  return mat3x2(ub.inner.mat3x2_f32_0, ub.inner.mat3x2_f32_1, ub.inner.mat3x2_f32_2);
+}
+
+mat4x2 load_ub_inner_mat4x2_f32() {
+  return mat4x2(ub.inner.mat4x2_f32_0, ub.inner.mat4x2_f32_1, ub.inner.mat4x2_f32_2, ub.inner.mat4x2_f32_3);
+}
+
+f16mat2 load_ub_inner_mat2x2_f16() {
+  return f16mat2(ub.inner.mat2x2_f16_0, ub.inner.mat2x2_f16_1);
+}
+
+f16mat2x3 load_ub_inner_mat2x3_f16() {
+  return f16mat2x3(ub.inner.mat2x3_f16_0, ub.inner.mat2x3_f16_1);
+}
+
+f16mat2x4 load_ub_inner_mat2x4_f16() {
+  return f16mat2x4(ub.inner.mat2x4_f16_0, ub.inner.mat2x4_f16_1);
+}
+
+f16mat3x2 load_ub_inner_mat3x2_f16() {
+  return f16mat3x2(ub.inner.mat3x2_f16_0, ub.inner.mat3x2_f16_1, ub.inner.mat3x2_f16_2);
+}
+
+f16mat3 load_ub_inner_mat3x3_f16() {
+  return f16mat3(ub.inner.mat3x3_f16_0, ub.inner.mat3x3_f16_1, ub.inner.mat3x3_f16_2);
+}
+
+f16mat3x4 load_ub_inner_mat3x4_f16() {
+  return f16mat3x4(ub.inner.mat3x4_f16_0, ub.inner.mat3x4_f16_1, ub.inner.mat3x4_f16_2);
+}
+
+f16mat4x2 load_ub_inner_mat4x2_f16() {
+  return f16mat4x2(ub.inner.mat4x2_f16_0, ub.inner.mat4x2_f16_1, ub.inner.mat4x2_f16_2, ub.inner.mat4x2_f16_3);
+}
+
+f16mat4x3 load_ub_inner_mat4x3_f16() {
+  return f16mat4x3(ub.inner.mat4x3_f16_0, ub.inner.mat4x3_f16_1, ub.inner.mat4x3_f16_2, ub.inner.mat4x3_f16_3);
+}
+
+f16mat4 load_ub_inner_mat4x4_f16() {
+  return f16mat4(ub.inner.mat4x4_f16_0, ub.inner.mat4x4_f16_1, ub.inner.mat4x4_f16_2, ub.inner.mat4x4_f16_3);
+}
+
+f16mat4x2 conv_mat4x2_f16(mat4x2_f16_4 val) {
+  return f16mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x2[2] conv_arr2_mat4x2_f16(mat4x2_f16_4 val[2]) {
+  f16mat4x2 arr[2] = f16mat4x2[2](f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 2u); i = (i + 1u)) {
+      arr[i] = conv_mat4x2_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void tint_symbol() {
+  float scalar_f32 = ub.inner.scalar_f32;
+  int scalar_i32 = ub.inner.scalar_i32;
+  uint scalar_u32 = ub.inner.scalar_u32;
+  float16_t scalar_f16 = ub.inner.scalar_f16;
+  vec2 vec2_f32 = ub.inner.vec2_f32;
+  ivec2 vec2_i32 = ub.inner.vec2_i32;
+  uvec2 vec2_u32 = ub.inner.vec2_u32;
+  f16vec2 vec2_f16 = ub.inner.vec2_f16;
+  vec3 vec3_f32 = ub.inner.vec3_f32;
+  ivec3 vec3_i32 = ub.inner.vec3_i32;
+  uvec3 vec3_u32 = ub.inner.vec3_u32;
+  f16vec3 vec3_f16 = ub.inner.vec3_f16;
+  vec4 vec4_f32 = ub.inner.vec4_f32;
+  ivec4 vec4_i32 = ub.inner.vec4_i32;
+  uvec4 vec4_u32 = ub.inner.vec4_u32;
+  f16vec4 vec4_f16 = ub.inner.vec4_f16;
+  mat2 mat2x2_f32 = load_ub_inner_mat2x2_f32();
+  mat2x3 mat2x3_f32 = ub.inner.mat2x3_f32;
+  mat2x4 mat2x4_f32 = ub.inner.mat2x4_f32;
+  mat3x2 mat3x2_f32 = load_ub_inner_mat3x2_f32();
+  mat3 mat3x3_f32 = ub.inner.mat3x3_f32;
+  mat3x4 mat3x4_f32 = ub.inner.mat3x4_f32;
+  mat4x2 mat4x2_f32 = load_ub_inner_mat4x2_f32();
+  mat4x3 mat4x3_f32 = ub.inner.mat4x3_f32;
+  mat4 mat4x4_f32 = ub.inner.mat4x4_f32;
+  f16mat2 mat2x2_f16 = load_ub_inner_mat2x2_f16();
+  f16mat2x3 mat2x3_f16 = load_ub_inner_mat2x3_f16();
+  f16mat2x4 mat2x4_f16 = load_ub_inner_mat2x4_f16();
+  f16mat3x2 mat3x2_f16 = load_ub_inner_mat3x2_f16();
+  f16mat3 mat3x3_f16 = load_ub_inner_mat3x3_f16();
+  f16mat3x4 mat3x4_f16 = load_ub_inner_mat3x4_f16();
+  f16mat4x2 mat4x2_f16 = load_ub_inner_mat4x2_f16();
+  f16mat4x3 mat4x3_f16 = load_ub_inner_mat4x3_f16();
+  f16mat4 mat4x4_f16 = load_ub_inner_mat4x4_f16();
+  vec3 arr2_vec3_f32[2] = ub.inner.arr2_vec3_f32;
+  f16mat4x2 arr2_mat4x2_f16[2] = conv_arr2_mat4x2_f16(ub.inner.arr2_mat4x2_f16);
+  Inner struct_inner = ub.inner.struct_inner;
+  Inner array_struct_inner[4] = ub.inner.array_struct_inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.msl b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.msl
new file mode 100644
index 0000000..99638ed
--- /dev/null
+++ b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.msl

@@ -0,0 +1,116 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ int scalar_i32;
+  /* 0x0004 */ float scalar_f32;
+  /* 0x0008 */ half scalar_f16;
+  /* 0x000a */ tint_array<int8_t, 6> tint_pad;
+};
+
+struct S {
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ int scalar_i32;
+  /* 0x0008 */ uint scalar_u32;
+  /* 0x000c */ half scalar_f16;
+  /* 0x000e */ tint_array<int8_t, 2> tint_pad_1;
+  /* 0x0010 */ float2 vec2_f32;
+  /* 0x0018 */ int2 vec2_i32;
+  /* 0x0020 */ uint2 vec2_u32;
+  /* 0x0028 */ half2 vec2_f16;
+  /* 0x002c */ tint_array<int8_t, 4> tint_pad_2;
+  /* 0x0030 */ packed_float3 vec3_f32;
+  /* 0x003c */ tint_array<int8_t, 4> tint_pad_3;
+  /* 0x0040 */ packed_int3 vec3_i32;
+  /* 0x004c */ tint_array<int8_t, 4> tint_pad_4;
+  /* 0x0050 */ packed_uint3 vec3_u32;
+  /* 0x005c */ tint_array<int8_t, 4> tint_pad_5;
+  /* 0x0060 */ packed_half3 vec3_f16;
+  /* 0x0066 */ tint_array<int8_t, 10> tint_pad_6;
+  /* 0x0070 */ float4 vec4_f32;
+  /* 0x0080 */ int4 vec4_i32;
+  /* 0x0090 */ uint4 vec4_u32;
+  /* 0x00a0 */ half4 vec4_f16;
+  /* 0x00a8 */ float2x2 mat2x2_f32;
+  /* 0x00b8 */ tint_array<int8_t, 8> tint_pad_7;
+  /* 0x00c0 */ float2x3 mat2x3_f32;
+  /* 0x00e0 */ float2x4 mat2x4_f32;
+  /* 0x0100 */ float3x2 mat3x2_f32;
+  /* 0x0118 */ tint_array<int8_t, 8> tint_pad_8;
+  /* 0x0120 */ float3x3 mat3x3_f32;
+  /* 0x0150 */ float3x4 mat3x4_f32;
+  /* 0x0180 */ float4x2 mat4x2_f32;
+  /* 0x01a0 */ float4x3 mat4x3_f32;
+  /* 0x01e0 */ float4x4 mat4x4_f32;
+  /* 0x0220 */ half2x2 mat2x2_f16;
+  /* 0x0228 */ half2x3 mat2x3_f16;
+  /* 0x0238 */ half2x4 mat2x4_f16;
+  /* 0x0248 */ half3x2 mat3x2_f16;
+  /* 0x0254 */ tint_array<int8_t, 4> tint_pad_9;
+  /* 0x0258 */ half3x3 mat3x3_f16;
+  /* 0x0270 */ half3x4 mat3x4_f16;
+  /* 0x0288 */ half4x2 mat4x2_f16;
+  /* 0x0298 */ half4x3 mat4x3_f16;
+  /* 0x02b8 */ half4x4 mat4x4_f16;
+  /* 0x02d8 */ tint_array<int8_t, 8> tint_pad_10;
+  /* 0x02e0 */ tint_array<float3, 2> arr2_vec3_f32;
+  /* 0x0300 */ tint_array<half4x2, 2> arr2_mat4x2_f16;
+  /* 0x0320 */ Inner struct_inner;
+  /* 0x0330 */ tint_array<Inner, 4> array_struct_inner;
+};
+
+kernel void tint_symbol(const constant S* tint_symbol_1 [[buffer(0)]]) {
+  float const scalar_f32 = (*(tint_symbol_1)).scalar_f32;
+  int const scalar_i32 = (*(tint_symbol_1)).scalar_i32;
+  uint const scalar_u32 = (*(tint_symbol_1)).scalar_u32;
+  half const scalar_f16 = (*(tint_symbol_1)).scalar_f16;
+  float2 const vec2_f32 = (*(tint_symbol_1)).vec2_f32;
+  int2 const vec2_i32 = (*(tint_symbol_1)).vec2_i32;
+  uint2 const vec2_u32 = (*(tint_symbol_1)).vec2_u32;
+  half2 const vec2_f16 = (*(tint_symbol_1)).vec2_f16;
+  float3 const vec3_f32 = float3((*(tint_symbol_1)).vec3_f32);
+  int3 const vec3_i32 = int3((*(tint_symbol_1)).vec3_i32);
+  uint3 const vec3_u32 = uint3((*(tint_symbol_1)).vec3_u32);
+  half3 const vec3_f16 = half3((*(tint_symbol_1)).vec3_f16);
+  float4 const vec4_f32 = (*(tint_symbol_1)).vec4_f32;
+  int4 const vec4_i32 = (*(tint_symbol_1)).vec4_i32;
+  uint4 const vec4_u32 = (*(tint_symbol_1)).vec4_u32;
+  half4 const vec4_f16 = (*(tint_symbol_1)).vec4_f16;
+  float2x2 const mat2x2_f32 = (*(tint_symbol_1)).mat2x2_f32;
+  float2x3 const mat2x3_f32 = (*(tint_symbol_1)).mat2x3_f32;
+  float2x4 const mat2x4_f32 = (*(tint_symbol_1)).mat2x4_f32;
+  float3x2 const mat3x2_f32 = (*(tint_symbol_1)).mat3x2_f32;
+  float3x3 const mat3x3_f32 = (*(tint_symbol_1)).mat3x3_f32;
+  float3x4 const mat3x4_f32 = (*(tint_symbol_1)).mat3x4_f32;
+  float4x2 const mat4x2_f32 = (*(tint_symbol_1)).mat4x2_f32;
+  float4x3 const mat4x3_f32 = (*(tint_symbol_1)).mat4x3_f32;
+  float4x4 const mat4x4_f32 = (*(tint_symbol_1)).mat4x4_f32;
+  half2x2 const mat2x2_f16 = (*(tint_symbol_1)).mat2x2_f16;
+  half2x3 const mat2x3_f16 = (*(tint_symbol_1)).mat2x3_f16;
+  half2x4 const mat2x4_f16 = (*(tint_symbol_1)).mat2x4_f16;
+  half3x2 const mat3x2_f16 = (*(tint_symbol_1)).mat3x2_f16;
+  half3x3 const mat3x3_f16 = (*(tint_symbol_1)).mat3x3_f16;
+  half3x4 const mat3x4_f16 = (*(tint_symbol_1)).mat3x4_f16;
+  half4x2 const mat4x2_f16 = (*(tint_symbol_1)).mat4x2_f16;
+  half4x3 const mat4x3_f16 = (*(tint_symbol_1)).mat4x3_f16;
+  half4x4 const mat4x4_f16 = (*(tint_symbol_1)).mat4x4_f16;
+  tint_array<float3, 2> const arr2_vec3_f32 = (*(tint_symbol_1)).arr2_vec3_f32;
+  tint_array<half4x2, 2> const arr2_mat4x2_f16 = (*(tint_symbol_1)).arr2_mat4x2_f16;
+  Inner const struct_inner = (*(tint_symbol_1)).struct_inner;
+  tint_array<Inner, 4> const array_struct_inner = (*(tint_symbol_1)).array_struct_inner;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..16735d1
--- /dev/null
+++ b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.spvasm

@@ -0,0 +1,616 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 426
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %ub_block_std140 "ub_block_std140"
+               OpMemberName %ub_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "scalar_f32"
+               OpMemberName %S_std140 1 "scalar_i32"
+               OpMemberName %S_std140 2 "scalar_u32"
+               OpMemberName %S_std140 3 "scalar_f16"
+               OpMemberName %S_std140 4 "vec2_f32"
+               OpMemberName %S_std140 5 "vec2_i32"
+               OpMemberName %S_std140 6 "vec2_u32"
+               OpMemberName %S_std140 7 "vec2_f16"
+               OpMemberName %S_std140 8 "vec3_f32"
+               OpMemberName %S_std140 9 "vec3_i32"
+               OpMemberName %S_std140 10 "vec3_u32"
+               OpMemberName %S_std140 11 "vec3_f16"
+               OpMemberName %S_std140 12 "vec4_f32"
+               OpMemberName %S_std140 13 "vec4_i32"
+               OpMemberName %S_std140 14 "vec4_u32"
+               OpMemberName %S_std140 15 "vec4_f16"
+               OpMemberName %S_std140 16 "mat2x2_f32_0"
+               OpMemberName %S_std140 17 "mat2x2_f32_1"
+               OpMemberName %S_std140 18 "mat2x3_f32"
+               OpMemberName %S_std140 19 "mat2x4_f32"
+               OpMemberName %S_std140 20 "mat3x2_f32_0"
+               OpMemberName %S_std140 21 "mat3x2_f32_1"
+               OpMemberName %S_std140 22 "mat3x2_f32_2"
+               OpMemberName %S_std140 23 "mat3x3_f32"
+               OpMemberName %S_std140 24 "mat3x4_f32"
+               OpMemberName %S_std140 25 "mat4x2_f32_0"
+               OpMemberName %S_std140 26 "mat4x2_f32_1"
+               OpMemberName %S_std140 27 "mat4x2_f32_2"
+               OpMemberName %S_std140 28 "mat4x2_f32_3"
+               OpMemberName %S_std140 29 "mat4x3_f32"
+               OpMemberName %S_std140 30 "mat4x4_f32"
+               OpMemberName %S_std140 31 "mat2x2_f16_0"
+               OpMemberName %S_std140 32 "mat2x2_f16_1"
+               OpMemberName %S_std140 33 "mat2x3_f16_0"
+               OpMemberName %S_std140 34 "mat2x3_f16_1"
+               OpMemberName %S_std140 35 "mat2x4_f16_0"
+               OpMemberName %S_std140 36 "mat2x4_f16_1"
+               OpMemberName %S_std140 37 "mat3x2_f16_0"
+               OpMemberName %S_std140 38 "mat3x2_f16_1"
+               OpMemberName %S_std140 39 "mat3x2_f16_2"
+               OpMemberName %S_std140 40 "mat3x3_f16_0"
+               OpMemberName %S_std140 41 "mat3x3_f16_1"
+               OpMemberName %S_std140 42 "mat3x3_f16_2"
+               OpMemberName %S_std140 43 "mat3x4_f16_0"
+               OpMemberName %S_std140 44 "mat3x4_f16_1"
+               OpMemberName %S_std140 45 "mat3x4_f16_2"
+               OpMemberName %S_std140 46 "mat4x2_f16_0"
+               OpMemberName %S_std140 47 "mat4x2_f16_1"
+               OpMemberName %S_std140 48 "mat4x2_f16_2"
+               OpMemberName %S_std140 49 "mat4x2_f16_3"
+               OpMemberName %S_std140 50 "mat4x3_f16_0"
+               OpMemberName %S_std140 51 "mat4x3_f16_1"
+               OpMemberName %S_std140 52 "mat4x3_f16_2"
+               OpMemberName %S_std140 53 "mat4x3_f16_3"
+               OpMemberName %S_std140 54 "mat4x4_f16_0"
+               OpMemberName %S_std140 55 "mat4x4_f16_1"
+               OpMemberName %S_std140 56 "mat4x4_f16_2"
+               OpMemberName %S_std140 57 "mat4x4_f16_3"
+               OpMemberName %S_std140 58 "arr2_vec3_f32"
+               OpMemberName %S_std140 59 "arr2_mat4x2_f16"
+               OpName %mat4x2_f16_4 "mat4x2_f16_4"
+               OpMemberName %mat4x2_f16_4 0 "col0"
+               OpMemberName %mat4x2_f16_4 1 "col1"
+               OpMemberName %mat4x2_f16_4 2 "col2"
+               OpMemberName %mat4x2_f16_4 3 "col3"
+               OpMemberName %S_std140 60 "struct_inner"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "scalar_i32"
+               OpMemberName %Inner 1 "scalar_f32"
+               OpMemberName %Inner 2 "scalar_f16"
+               OpMemberName %S_std140 61 "array_struct_inner"
+               OpName %ub "ub"
+               OpName %load_ub_inner_mat2x2_f32 "load_ub_inner_mat2x2_f32"
+               OpName %load_ub_inner_mat3x2_f32 "load_ub_inner_mat3x2_f32"
+               OpName %load_ub_inner_mat4x2_f32 "load_ub_inner_mat4x2_f32"
+               OpName %load_ub_inner_mat2x2_f16 "load_ub_inner_mat2x2_f16"
+               OpName %load_ub_inner_mat2x3_f16 "load_ub_inner_mat2x3_f16"
+               OpName %load_ub_inner_mat2x4_f16 "load_ub_inner_mat2x4_f16"
+               OpName %load_ub_inner_mat3x2_f16 "load_ub_inner_mat3x2_f16"
+               OpName %load_ub_inner_mat3x3_f16 "load_ub_inner_mat3x3_f16"
+               OpName %load_ub_inner_mat3x4_f16 "load_ub_inner_mat3x4_f16"
+               OpName %load_ub_inner_mat4x2_f16 "load_ub_inner_mat4x2_f16"
+               OpName %load_ub_inner_mat4x3_f16 "load_ub_inner_mat4x3_f16"
+               OpName %load_ub_inner_mat4x4_f16 "load_ub_inner_mat4x4_f16"
+               OpName %conv_mat4x2_f16 "conv_mat4x2_f16"
+               OpName %val "val"
+               OpName %conv_arr2_mat4x2_f16 "conv_arr2_mat4x2_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %main "main"
+               OpDecorate %ub_block_std140 Block
+               OpMemberDecorate %ub_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 12
+               OpMemberDecorate %S_std140 4 Offset 16
+               OpMemberDecorate %S_std140 5 Offset 24
+               OpMemberDecorate %S_std140 6 Offset 32
+               OpMemberDecorate %S_std140 7 Offset 40
+               OpMemberDecorate %S_std140 8 Offset 48
+               OpMemberDecorate %S_std140 9 Offset 64
+               OpMemberDecorate %S_std140 10 Offset 80
+               OpMemberDecorate %S_std140 11 Offset 96
+               OpMemberDecorate %S_std140 12 Offset 112
+               OpMemberDecorate %S_std140 13 Offset 128
+               OpMemberDecorate %S_std140 14 Offset 144
+               OpMemberDecorate %S_std140 15 Offset 160
+               OpMemberDecorate %S_std140 16 Offset 168
+               OpMemberDecorate %S_std140 17 Offset 176
+               OpMemberDecorate %S_std140 18 Offset 192
+               OpMemberDecorate %S_std140 18 ColMajor
+               OpMemberDecorate %S_std140 18 MatrixStride 16
+               OpMemberDecorate %S_std140 19 Offset 224
+               OpMemberDecorate %S_std140 19 ColMajor
+               OpMemberDecorate %S_std140 19 MatrixStride 16
+               OpMemberDecorate %S_std140 20 Offset 256
+               OpMemberDecorate %S_std140 21 Offset 264
+               OpMemberDecorate %S_std140 22 Offset 272
+               OpMemberDecorate %S_std140 23 Offset 288
+               OpMemberDecorate %S_std140 23 ColMajor
+               OpMemberDecorate %S_std140 23 MatrixStride 16
+               OpMemberDecorate %S_std140 24 Offset 336
+               OpMemberDecorate %S_std140 24 ColMajor
+               OpMemberDecorate %S_std140 24 MatrixStride 16
+               OpMemberDecorate %S_std140 25 Offset 384
+               OpMemberDecorate %S_std140 26 Offset 392
+               OpMemberDecorate %S_std140 27 Offset 400
+               OpMemberDecorate %S_std140 28 Offset 408
+               OpMemberDecorate %S_std140 29 Offset 416
+               OpMemberDecorate %S_std140 29 ColMajor
+               OpMemberDecorate %S_std140 29 MatrixStride 16
+               OpMemberDecorate %S_std140 30 Offset 480
+               OpMemberDecorate %S_std140 30 ColMajor
+               OpMemberDecorate %S_std140 30 MatrixStride 16
+               OpMemberDecorate %S_std140 31 Offset 544
+               OpMemberDecorate %S_std140 32 Offset 548
+               OpMemberDecorate %S_std140 33 Offset 552
+               OpMemberDecorate %S_std140 34 Offset 560
+               OpMemberDecorate %S_std140 35 Offset 568
+               OpMemberDecorate %S_std140 36 Offset 576
+               OpMemberDecorate %S_std140 37 Offset 584
+               OpMemberDecorate %S_std140 38 Offset 588
+               OpMemberDecorate %S_std140 39 Offset 592
+               OpMemberDecorate %S_std140 40 Offset 600
+               OpMemberDecorate %S_std140 41 Offset 608
+               OpMemberDecorate %S_std140 42 Offset 616
+               OpMemberDecorate %S_std140 43 Offset 624
+               OpMemberDecorate %S_std140 44 Offset 632
+               OpMemberDecorate %S_std140 45 Offset 640
+               OpMemberDecorate %S_std140 46 Offset 648
+               OpMemberDecorate %S_std140 47 Offset 652
+               OpMemberDecorate %S_std140 48 Offset 656
+               OpMemberDecorate %S_std140 49 Offset 660
+               OpMemberDecorate %S_std140 50 Offset 664
+               OpMemberDecorate %S_std140 51 Offset 672
+               OpMemberDecorate %S_std140 52 Offset 680
+               OpMemberDecorate %S_std140 53 Offset 688
+               OpMemberDecorate %S_std140 54 Offset 696
+               OpMemberDecorate %S_std140 55 Offset 704
+               OpMemberDecorate %S_std140 56 Offset 712
+               OpMemberDecorate %S_std140 57 Offset 720
+               OpMemberDecorate %S_std140 58 Offset 736
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpMemberDecorate %S_std140 59 Offset 768
+               OpMemberDecorate %mat4x2_f16_4 0 Offset 0
+               OpMemberDecorate %mat4x2_f16_4 1 Offset 4
+               OpMemberDecorate %mat4x2_f16_4 2 Offset 8
+               OpMemberDecorate %mat4x2_f16_4 3 Offset 12
+               OpDecorate %_arr_mat4x2_f16_4_uint_2 ArrayStride 16
+               OpMemberDecorate %S_std140 60 Offset 800
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 1 Offset 4
+               OpMemberDecorate %Inner 2 Offset 8
+               OpMemberDecorate %S_std140 61 Offset 816
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 16
+               OpDecorate %ub NonWritable
+               OpDecorate %ub Binding 0
+               OpDecorate %ub DescriptorSet 0
+               OpDecorate %_arr_mat4v2half_uint_2 ArrayStride 16
+      %float = OpTypeFloat 32
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+       %half = OpTypeFloat 16
+    %v2float = OpTypeVector %float 2
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+     %v2half = OpTypeVector %half 2
+    %v3float = OpTypeVector %float 3
+      %v3int = OpTypeVector %int 3
+     %v3uint = OpTypeVector %uint 3
+     %v3half = OpTypeVector %half 3
+    %v4float = OpTypeVector %float 4
+      %v4int = OpTypeVector %int 4
+     %v4uint = OpTypeVector %uint 4
+     %v4half = OpTypeVector %half 4
+%mat2v3float = OpTypeMatrix %v3float 2
+%mat2v4float = OpTypeMatrix %v4float 2
+%mat3v3float = OpTypeMatrix %v3float 3
+%mat3v4float = OpTypeMatrix %v4float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+     %uint_2 = OpConstant %uint 2
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+%mat4x2_f16_4 = OpTypeStruct %v2half %v2half %v2half %v2half
+%_arr_mat4x2_f16_4_uint_2 = OpTypeArray %mat4x2_f16_4 %uint_2
+      %Inner = OpTypeStruct %int %float %half
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+   %S_std140 = OpTypeStruct %float %int %uint %half %v2float %v2int %v2uint %v2half %v3float %v3int %v3uint %v3half %v4float %v4int %v4uint %v4half %v2float %v2float %mat2v3float %mat2v4float %v2float %v2float %v2float %mat3v3float %mat3v4float %v2float %v2float %v2float %v2float %mat4v3float %mat4v4float %v2half %v2half %v3half %v3half %v4half %v4half %v2half %v2half %v2half %v3half %v3half %v3half %v4half %v4half %v4half %v2half %v2half %v2half %v2half %v3half %v3half %v3half %v3half %v4half %v4half %v4half %v4half %_arr_v3float_uint_2 %_arr_mat4x2_f16_4_uint_2 %Inner %_arr_Inner_uint_4
+%ub_block_std140 = OpTypeStruct %S_std140
+%_ptr_Uniform_ub_block_std140 = OpTypePointer Uniform %ub_block_std140
+         %ub = OpVariable %_ptr_Uniform_ub_block_std140 Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+         %34 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+    %uint_16 = OpConstant %uint 16
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+    %uint_17 = OpConstant %uint 17
+%mat3v2float = OpTypeMatrix %v2float 3
+         %52 = OpTypeFunction %mat3v2float
+    %uint_20 = OpConstant %uint 20
+    %uint_21 = OpConstant %uint 21
+    %uint_22 = OpConstant %uint 22
+%mat4v2float = OpTypeMatrix %v2float 4
+         %71 = OpTypeFunction %mat4v2float
+    %uint_25 = OpConstant %uint 25
+    %uint_26 = OpConstant %uint 26
+    %uint_27 = OpConstant %uint 27
+    %uint_28 = OpConstant %uint 28
+ %mat2v2half = OpTypeMatrix %v2half 2
+         %94 = OpTypeFunction %mat2v2half
+    %uint_31 = OpConstant %uint 31
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+    %uint_32 = OpConstant %uint 32
+ %mat2v3half = OpTypeMatrix %v3half 2
+        %110 = OpTypeFunction %mat2v3half
+    %uint_33 = OpConstant %uint 33
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+    %uint_34 = OpConstant %uint 34
+ %mat2v4half = OpTypeMatrix %v4half 2
+        %126 = OpTypeFunction %mat2v4half
+    %uint_35 = OpConstant %uint 35
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+    %uint_36 = OpConstant %uint 36
+ %mat3v2half = OpTypeMatrix %v2half 3
+        %142 = OpTypeFunction %mat3v2half
+    %uint_37 = OpConstant %uint 37
+    %uint_38 = OpConstant %uint 38
+    %uint_39 = OpConstant %uint 39
+ %mat3v3half = OpTypeMatrix %v3half 3
+        %161 = OpTypeFunction %mat3v3half
+    %uint_40 = OpConstant %uint 40
+    %uint_41 = OpConstant %uint 41
+    %uint_42 = OpConstant %uint 42
+ %mat3v4half = OpTypeMatrix %v4half 3
+        %180 = OpTypeFunction %mat3v4half
+    %uint_43 = OpConstant %uint 43
+    %uint_44 = OpConstant %uint 44
+    %uint_45 = OpConstant %uint 45
+ %mat4v2half = OpTypeMatrix %v2half 4
+        %199 = OpTypeFunction %mat4v2half
+    %uint_46 = OpConstant %uint 46
+    %uint_47 = OpConstant %uint 47
+    %uint_48 = OpConstant %uint 48
+    %uint_49 = OpConstant %uint 49
+ %mat4v3half = OpTypeMatrix %v3half 4
+        %222 = OpTypeFunction %mat4v3half
+    %uint_50 = OpConstant %uint 50
+    %uint_51 = OpConstant %uint 51
+    %uint_52 = OpConstant %uint 52
+    %uint_53 = OpConstant %uint 53
+ %mat4v4half = OpTypeMatrix %v4half 4
+        %245 = OpTypeFunction %mat4v4half
+    %uint_54 = OpConstant %uint 54
+    %uint_55 = OpConstant %uint 55
+    %uint_56 = OpConstant %uint 56
+    %uint_57 = OpConstant %uint 57
+        %268 = OpTypeFunction %mat4v2half %mat4x2_f16_4
+%_arr_mat4v2half_uint_2 = OpTypeArray %mat4v2half %uint_2
+        %277 = OpTypeFunction %_arr_mat4v2half_uint_2 %_arr_mat4x2_f16_4_uint_2
+%_ptr_Function__arr_mat4v2half_uint_2 = OpTypePointer Function %_arr_mat4v2half_uint_2
+        %284 = OpConstantNull %_arr_mat4v2half_uint_2
+%_ptr_Function_uint = OpTypePointer Function %uint
+        %287 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x2_f16_4_uint_2 = OpTypePointer Function %_arr_mat4x2_f16_4_uint_2
+        %300 = OpConstantNull %_arr_mat4x2_f16_4_uint_2
+%_ptr_Function_mat4v2half = OpTypePointer Function %mat4v2half
+%_ptr_Function_mat4x2_f16_4 = OpTypePointer Function %mat4x2_f16_4
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+        %313 = OpTypeFunction %void
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+     %uint_3 = OpConstant %uint 3
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+     %uint_5 = OpConstant %uint 5
+%_ptr_Uniform_v2int = OpTypePointer Uniform %v2int
+     %uint_6 = OpConstant %uint 6
+%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
+     %uint_7 = OpConstant %uint 7
+     %uint_8 = OpConstant %uint 8
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %uint_9 = OpConstant %uint 9
+%_ptr_Uniform_v3int = OpTypePointer Uniform %v3int
+    %uint_10 = OpConstant %uint 10
+%_ptr_Uniform_v3uint = OpTypePointer Uniform %v3uint
+    %uint_11 = OpConstant %uint 11
+    %uint_12 = OpConstant %uint 12
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+    %uint_13 = OpConstant %uint 13
+%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
+    %uint_14 = OpConstant %uint 14
+%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint
+    %uint_15 = OpConstant %uint 15
+    %uint_18 = OpConstant %uint 18
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+    %uint_19 = OpConstant %uint 19
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+    %uint_23 = OpConstant %uint 23
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+    %uint_24 = OpConstant %uint 24
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+    %uint_29 = OpConstant %uint 29
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+    %uint_30 = OpConstant %uint 30
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+    %uint_58 = OpConstant %uint 58
+%_ptr_Uniform__arr_v3float_uint_2 = OpTypePointer Uniform %_arr_v3float_uint_2
+    %uint_59 = OpConstant %uint 59
+%_ptr_Uniform__arr_mat4x2_f16_4_uint_2 = OpTypePointer Uniform %_arr_mat4x2_f16_4_uint_2
+    %uint_60 = OpConstant %uint 60
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+    %uint_61 = OpConstant %uint 61
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+%load_ub_inner_mat2x2_f32 = OpFunction %mat2v2float None %34
+         %37 = OpLabel
+         %41 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+         %45 = OpAccessChain %_ptr_Uniform_v2float %41 %uint_16
+         %46 = OpLoad %v2float %45
+         %49 = OpAccessChain %_ptr_Uniform_v2float %41 %uint_17
+         %50 = OpLoad %v2float %49
+         %51 = OpCompositeConstruct %mat2v2float %46 %50
+               OpReturnValue %51
+               OpFunctionEnd
+%load_ub_inner_mat3x2_f32 = OpFunction %mat3v2float None %52
+         %55 = OpLabel
+         %57 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+         %60 = OpAccessChain %_ptr_Uniform_v2float %57 %uint_20
+         %61 = OpLoad %v2float %60
+         %64 = OpAccessChain %_ptr_Uniform_v2float %57 %uint_21
+         %65 = OpLoad %v2float %64
+         %68 = OpAccessChain %_ptr_Uniform_v2float %57 %uint_22
+         %69 = OpLoad %v2float %68
+         %70 = OpCompositeConstruct %mat3v2float %61 %65 %69
+               OpReturnValue %70
+               OpFunctionEnd
+%load_ub_inner_mat4x2_f32 = OpFunction %mat4v2float None %71
+         %74 = OpLabel
+         %76 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+         %79 = OpAccessChain %_ptr_Uniform_v2float %76 %uint_25
+         %80 = OpLoad %v2float %79
+         %83 = OpAccessChain %_ptr_Uniform_v2float %76 %uint_26
+         %84 = OpLoad %v2float %83
+         %87 = OpAccessChain %_ptr_Uniform_v2float %76 %uint_27
+         %88 = OpLoad %v2float %87
+         %91 = OpAccessChain %_ptr_Uniform_v2float %76 %uint_28
+         %92 = OpLoad %v2float %91
+         %93 = OpCompositeConstruct %mat4v2float %80 %84 %88 %92
+               OpReturnValue %93
+               OpFunctionEnd
+%load_ub_inner_mat2x2_f16 = OpFunction %mat2v2half None %94
+         %97 = OpLabel
+         %99 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+        %103 = OpAccessChain %_ptr_Uniform_v2half %99 %uint_31
+        %104 = OpLoad %v2half %103
+        %107 = OpAccessChain %_ptr_Uniform_v2half %99 %uint_32
+        %108 = OpLoad %v2half %107
+        %109 = OpCompositeConstruct %mat2v2half %104 %108
+               OpReturnValue %109
+               OpFunctionEnd
+%load_ub_inner_mat2x3_f16 = OpFunction %mat2v3half None %110
+        %113 = OpLabel
+        %115 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+        %119 = OpAccessChain %_ptr_Uniform_v3half %115 %uint_33
+        %120 = OpLoad %v3half %119
+        %123 = OpAccessChain %_ptr_Uniform_v3half %115 %uint_34
+        %124 = OpLoad %v3half %123
+        %125 = OpCompositeConstruct %mat2v3half %120 %124
+               OpReturnValue %125
+               OpFunctionEnd
+%load_ub_inner_mat2x4_f16 = OpFunction %mat2v4half None %126
+        %129 = OpLabel
+        %131 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+        %135 = OpAccessChain %_ptr_Uniform_v4half %131 %uint_35
+        %136 = OpLoad %v4half %135
+        %139 = OpAccessChain %_ptr_Uniform_v4half %131 %uint_36
+        %140 = OpLoad %v4half %139
+        %141 = OpCompositeConstruct %mat2v4half %136 %140
+               OpReturnValue %141
+               OpFunctionEnd
+%load_ub_inner_mat3x2_f16 = OpFunction %mat3v2half None %142
+        %145 = OpLabel
+        %147 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+        %150 = OpAccessChain %_ptr_Uniform_v2half %147 %uint_37
+        %151 = OpLoad %v2half %150
+        %154 = OpAccessChain %_ptr_Uniform_v2half %147 %uint_38
+        %155 = OpLoad %v2half %154
+        %158 = OpAccessChain %_ptr_Uniform_v2half %147 %uint_39
+        %159 = OpLoad %v2half %158
+        %160 = OpCompositeConstruct %mat3v2half %151 %155 %159
+               OpReturnValue %160
+               OpFunctionEnd
+%load_ub_inner_mat3x3_f16 = OpFunction %mat3v3half None %161
+        %164 = OpLabel
+        %166 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+        %169 = OpAccessChain %_ptr_Uniform_v3half %166 %uint_40
+        %170 = OpLoad %v3half %169
+        %173 = OpAccessChain %_ptr_Uniform_v3half %166 %uint_41
+        %174 = OpLoad %v3half %173
+        %177 = OpAccessChain %_ptr_Uniform_v3half %166 %uint_42
+        %178 = OpLoad %v3half %177
+        %179 = OpCompositeConstruct %mat3v3half %170 %174 %178
+               OpReturnValue %179
+               OpFunctionEnd
+%load_ub_inner_mat3x4_f16 = OpFunction %mat3v4half None %180
+        %183 = OpLabel
+        %185 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+        %188 = OpAccessChain %_ptr_Uniform_v4half %185 %uint_43
+        %189 = OpLoad %v4half %188
+        %192 = OpAccessChain %_ptr_Uniform_v4half %185 %uint_44
+        %193 = OpLoad %v4half %192
+        %196 = OpAccessChain %_ptr_Uniform_v4half %185 %uint_45
+        %197 = OpLoad %v4half %196
+        %198 = OpCompositeConstruct %mat3v4half %189 %193 %197
+               OpReturnValue %198
+               OpFunctionEnd
+%load_ub_inner_mat4x2_f16 = OpFunction %mat4v2half None %199
+        %202 = OpLabel
+        %204 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+        %207 = OpAccessChain %_ptr_Uniform_v2half %204 %uint_46
+        %208 = OpLoad %v2half %207
+        %211 = OpAccessChain %_ptr_Uniform_v2half %204 %uint_47
+        %212 = OpLoad %v2half %211
+        %215 = OpAccessChain %_ptr_Uniform_v2half %204 %uint_48
+        %216 = OpLoad %v2half %215
+        %219 = OpAccessChain %_ptr_Uniform_v2half %204 %uint_49
+        %220 = OpLoad %v2half %219
+        %221 = OpCompositeConstruct %mat4v2half %208 %212 %216 %220
+               OpReturnValue %221
+               OpFunctionEnd
+%load_ub_inner_mat4x3_f16 = OpFunction %mat4v3half None %222
+        %225 = OpLabel
+        %227 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+        %230 = OpAccessChain %_ptr_Uniform_v3half %227 %uint_50
+        %231 = OpLoad %v3half %230
+        %234 = OpAccessChain %_ptr_Uniform_v3half %227 %uint_51
+        %235 = OpLoad %v3half %234
+        %238 = OpAccessChain %_ptr_Uniform_v3half %227 %uint_52
+        %239 = OpLoad %v3half %238
+        %242 = OpAccessChain %_ptr_Uniform_v3half %227 %uint_53
+        %243 = OpLoad %v3half %242
+        %244 = OpCompositeConstruct %mat4v3half %231 %235 %239 %243
+               OpReturnValue %244
+               OpFunctionEnd
+%load_ub_inner_mat4x4_f16 = OpFunction %mat4v4half None %245
+        %248 = OpLabel
+        %250 = OpAccessChain %_ptr_Uniform_S_std140 %ub %uint_0
+        %253 = OpAccessChain %_ptr_Uniform_v4half %250 %uint_54
+        %254 = OpLoad %v4half %253
+        %257 = OpAccessChain %_ptr_Uniform_v4half %250 %uint_55
+        %258 = OpLoad %v4half %257
+        %261 = OpAccessChain %_ptr_Uniform_v4half %250 %uint_56
+        %262 = OpLoad %v4half %261
+        %265 = OpAccessChain %_ptr_Uniform_v4half %250 %uint_57
+        %266 = OpLoad %v4half %265
+        %267 = OpCompositeConstruct %mat4v4half %254 %258 %262 %266
+               OpReturnValue %267
+               OpFunctionEnd
+%conv_mat4x2_f16 = OpFunction %mat4v2half None %268
+        %val = OpFunctionParameter %mat4x2_f16_4
+        %271 = OpLabel
+        %272 = OpCompositeExtract %v2half %val 0
+        %273 = OpCompositeExtract %v2half %val 1
+        %274 = OpCompositeExtract %v2half %val 2
+        %275 = OpCompositeExtract %v2half %val 3
+        %276 = OpCompositeConstruct %mat4v2half %272 %273 %274 %275
+               OpReturnValue %276
+               OpFunctionEnd
+%conv_arr2_mat4x2_f16 = OpFunction %_arr_mat4v2half_uint_2 None %277
+      %val_0 = OpFunctionParameter %_arr_mat4x2_f16_4_uint_2
+        %281 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v2half_uint_2 Function %284
+          %i = OpVariable %_ptr_Function_uint Function %287
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x2_f16_4_uint_2 Function %300
+               OpBranch %288
+        %288 = OpLabel
+               OpLoopMerge %289 %290 None
+               OpBranch %291
+        %291 = OpLabel
+        %293 = OpLoad %uint %i
+        %294 = OpULessThan %bool %293 %uint_2
+        %292 = OpLogicalNot %bool %294
+               OpSelectionMerge %296 None
+               OpBranchConditional %292 %297 %296
+        %297 = OpLabel
+               OpBranch %289
+        %296 = OpLabel
+               OpStore %var_for_index %val_0
+        %301 = OpLoad %uint %i
+        %303 = OpAccessChain %_ptr_Function_mat4v2half %arr %301
+        %305 = OpLoad %uint %i
+        %307 = OpAccessChain %_ptr_Function_mat4x2_f16_4 %var_for_index %305
+        %308 = OpLoad %mat4x2_f16_4 %307
+        %304 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %308
+               OpStore %303 %304
+               OpBranch %290
+        %290 = OpLabel
+        %309 = OpLoad %uint %i
+        %311 = OpIAdd %uint %309 %uint_1
+               OpStore %i %311
+               OpBranch %288
+        %289 = OpLabel
+        %312 = OpLoad %_arr_mat4v2half_uint_2 %arr
+               OpReturnValue %312
+               OpFunctionEnd
+       %main = OpFunction %void None %313
+        %316 = OpLabel
+        %318 = OpAccessChain %_ptr_Uniform_float %ub %uint_0 %uint_0
+        %319 = OpLoad %float %318
+        %321 = OpAccessChain %_ptr_Uniform_int %ub %uint_0 %uint_1
+        %322 = OpLoad %int %321
+        %324 = OpAccessChain %_ptr_Uniform_uint %ub %uint_0 %uint_2
+        %325 = OpLoad %uint %324
+        %328 = OpAccessChain %_ptr_Uniform_half %ub %uint_0 %uint_3
+        %329 = OpLoad %half %328
+        %330 = OpAccessChain %_ptr_Uniform_v2float %ub %uint_0 %uint_4
+        %331 = OpLoad %v2float %330
+        %334 = OpAccessChain %_ptr_Uniform_v2int %ub %uint_0 %uint_5
+        %335 = OpLoad %v2int %334
+        %338 = OpAccessChain %_ptr_Uniform_v2uint %ub %uint_0 %uint_6
+        %339 = OpLoad %v2uint %338
+        %341 = OpAccessChain %_ptr_Uniform_v2half %ub %uint_0 %uint_7
+        %342 = OpLoad %v2half %341
+        %345 = OpAccessChain %_ptr_Uniform_v3float %ub %uint_0 %uint_8
+        %346 = OpLoad %v3float %345
+        %349 = OpAccessChain %_ptr_Uniform_v3int %ub %uint_0 %uint_9
+        %350 = OpLoad %v3int %349
+        %353 = OpAccessChain %_ptr_Uniform_v3uint %ub %uint_0 %uint_10
+        %354 = OpLoad %v3uint %353
+        %356 = OpAccessChain %_ptr_Uniform_v3half %ub %uint_0 %uint_11
+        %357 = OpLoad %v3half %356
+        %360 = OpAccessChain %_ptr_Uniform_v4float %ub %uint_0 %uint_12
+        %361 = OpLoad %v4float %360
+        %364 = OpAccessChain %_ptr_Uniform_v4int %ub %uint_0 %uint_13
+        %365 = OpLoad %v4int %364
+        %368 = OpAccessChain %_ptr_Uniform_v4uint %ub %uint_0 %uint_14
+        %369 = OpLoad %v4uint %368
+        %371 = OpAccessChain %_ptr_Uniform_v4half %ub %uint_0 %uint_15
+        %372 = OpLoad %v4half %371
+        %373 = OpFunctionCall %mat2v2float %load_ub_inner_mat2x2_f32
+        %376 = OpAccessChain %_ptr_Uniform_mat2v3float %ub %uint_0 %uint_18
+        %377 = OpLoad %mat2v3float %376
+        %380 = OpAccessChain %_ptr_Uniform_mat2v4float %ub %uint_0 %uint_19
+        %381 = OpLoad %mat2v4float %380
+        %382 = OpFunctionCall %mat3v2float %load_ub_inner_mat3x2_f32
+        %385 = OpAccessChain %_ptr_Uniform_mat3v3float %ub %uint_0 %uint_23
+        %386 = OpLoad %mat3v3float %385
+        %389 = OpAccessChain %_ptr_Uniform_mat3v4float %ub %uint_0 %uint_24
+        %390 = OpLoad %mat3v4float %389
+        %391 = OpFunctionCall %mat4v2float %load_ub_inner_mat4x2_f32
+        %394 = OpAccessChain %_ptr_Uniform_mat4v3float %ub %uint_0 %uint_29
+        %395 = OpLoad %mat4v3float %394
+        %398 = OpAccessChain %_ptr_Uniform_mat4v4float %ub %uint_0 %uint_30
+        %399 = OpLoad %mat4v4float %398
+        %400 = OpFunctionCall %mat2v2half %load_ub_inner_mat2x2_f16
+        %401 = OpFunctionCall %mat2v3half %load_ub_inner_mat2x3_f16
+        %402 = OpFunctionCall %mat2v4half %load_ub_inner_mat2x4_f16
+        %403 = OpFunctionCall %mat3v2half %load_ub_inner_mat3x2_f16
+        %404 = OpFunctionCall %mat3v3half %load_ub_inner_mat3x3_f16
+        %405 = OpFunctionCall %mat3v4half %load_ub_inner_mat3x4_f16
+        %406 = OpFunctionCall %mat4v2half %load_ub_inner_mat4x2_f16
+        %407 = OpFunctionCall %mat4v3half %load_ub_inner_mat4x3_f16
+        %408 = OpFunctionCall %mat4v4half %load_ub_inner_mat4x4_f16
+        %411 = OpAccessChain %_ptr_Uniform__arr_v3float_uint_2 %ub %uint_0 %uint_58
+        %412 = OpLoad %_arr_v3float_uint_2 %411
+        %416 = OpAccessChain %_ptr_Uniform__arr_mat4x2_f16_4_uint_2 %ub %uint_0 %uint_59
+        %417 = OpLoad %_arr_mat4x2_f16_4_uint_2 %416
+        %413 = OpFunctionCall %_arr_mat4v2half_uint_2 %conv_arr2_mat4x2_f16 %417
+        %420 = OpAccessChain %_ptr_Uniform_Inner %ub %uint_0 %uint_60
+        %421 = OpLoad %Inner %420
+        %424 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %ub %uint_0 %uint_61
+        %425 = OpLoad %_arr_Inner_uint_4 %424
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..808033c
--- /dev/null
+++ b/test/tint/buffer/uniform/static_index/read_f16.wgsl.expected.wgsl

@@ -0,0 +1,96 @@
+enable f16;
+
+struct Inner {
+  scalar_i32 : i32,
+  scalar_f32 : f32,
+  @size(8)
+  scalar_f16 : f16,
+}
+
+struct S {
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  @align(16)
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
+  @align(16)
+  struct_inner : Inner,
+  @align(16)
+  array_struct_inner : array<Inner, 4>,
+}
+
+@binding(0) @group(0) var<uniform> ub : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let scalar_f32 = ub.scalar_f32;
+  let scalar_i32 = ub.scalar_i32;
+  let scalar_u32 = ub.scalar_u32;
+  let scalar_f16 = ub.scalar_f16;
+  let vec2_f32 = ub.vec2_f32;
+  let vec2_i32 = ub.vec2_i32;
+  let vec2_u32 = ub.vec2_u32;
+  let vec2_f16 = ub.vec2_f16;
+  let vec3_f32 = ub.vec3_f32;
+  let vec3_i32 = ub.vec3_i32;
+  let vec3_u32 = ub.vec3_u32;
+  let vec3_f16 = ub.vec3_f16;
+  let vec4_f32 = ub.vec4_f32;
+  let vec4_i32 = ub.vec4_i32;
+  let vec4_u32 = ub.vec4_u32;
+  let vec4_f16 = ub.vec4_f16;
+  let mat2x2_f32 = ub.mat2x2_f32;
+  let mat2x3_f32 = ub.mat2x3_f32;
+  let mat2x4_f32 = ub.mat2x4_f32;
+  let mat3x2_f32 = ub.mat3x2_f32;
+  let mat3x3_f32 = ub.mat3x3_f32;
+  let mat3x4_f32 = ub.mat3x4_f32;
+  let mat4x2_f32 = ub.mat4x2_f32;
+  let mat4x3_f32 = ub.mat4x3_f32;
+  let mat4x4_f32 = ub.mat4x4_f32;
+  let mat2x2_f16 = ub.mat2x2_f16;
+  let mat2x3_f16 = ub.mat2x3_f16;
+  let mat2x4_f16 = ub.mat2x4_f16;
+  let mat3x2_f16 = ub.mat3x2_f16;
+  let mat3x3_f16 = ub.mat3x3_f16;
+  let mat3x4_f16 = ub.mat3x4_f16;
+  let mat4x2_f16 = ub.mat4x2_f16;
+  let mat4x3_f16 = ub.mat4x3_f16;
+  let mat4x4_f16 = ub.mat4x4_f16;
+  let arr2_vec3_f32 = ub.arr2_vec3_f32;
+  let arr2_mat4x2_f16 = ub.arr2_mat4x2_f16;
+  let struct_inner = ub.struct_inner;
+  let array_struct_inner = ub.array_struct_inner;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl
deleted file mode 100644
index 8a421d8..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
-var<private> p : array<mat2x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    p = u;
-    p[1] = u[2];
-    p[1][0] = u[0][1].yx;
-    p[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.glsl
deleted file mode 100644
index 2118347..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.glsl
+++ /dev/null

@@ -1,44 +0,0 @@
-#version 310 es
-
-struct mat2x2_f32 {
-  vec2 col0;
-  vec2 col1;
-};
-
-struct S {
-  int before;
-  mat2 m;
-  int after;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  mat2x2_f32 inner[4];
-} u;
-
-mat2 p[4] = mat2[4](mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f));
-mat2 conv_mat2x2_f32(mat2x2_f32 val) {
-  return mat2(val.col0, val.col1);
-}
-
-mat2[4] conv_arr4_mat2x2_f32(mat2x2_f32 val[4]) {
-  mat2 arr[4] = mat2[4](mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_mat2x2_f32(val[i]);
-    }
-  }
-  return arr;
-}
-
-void f() {
-  p = conv_arr4_mat2x2_f32(u.inner);
-  p[1] = conv_mat2x2_f32(u.inner[2u]);
-  p[1][0] = u.inner[0u].col1.yx;
-  p[1][0].x = u.inner[0u].col1[0u];
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.msl
deleted file mode 100644
index ec3ed6d..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.msl
+++ /dev/null

@@ -1,31 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  int before;
-  float2x2 m;
-  int after;
-};
-
-kernel void f(const constant tint_array<float2x2, 4>* tint_symbol_1 [[buffer(0)]]) {
-  thread tint_array<float2x2, 4> tint_symbol = {};
-  tint_symbol = *(tint_symbol_1);
-  tint_symbol[1] = (*(tint_symbol_1))[2];
-  tint_symbol[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
-  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.wgsl
deleted file mode 100644
index 732f5d6..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
-
-var<private> p : array<mat2x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  p = u;
-  p[1] = u[2];
-  p[1][0] = u[0][1].yx;
-  p[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl
deleted file mode 100644
index 3a40144..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
-@group(0) @binding(1) var<storage, read_write> s : array<mat2x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    s = u;
-    s[1] = u[2];
-    s[1][0] = u[0][1].yx;
-    s[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.glsl
deleted file mode 100644
index c6be613..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.glsl
+++ /dev/null

@@ -1,47 +0,0 @@
-#version 310 es
-
-struct mat2x2_f32 {
-  vec2 col0;
-  vec2 col1;
-};
-
-struct S {
-  int before;
-  mat2 m;
-  int after;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  mat2x2_f32 inner[4];
-} u;
-
-layout(binding = 1, std430) buffer u_block_ssbo {
-  mat2 inner[4];
-} s;
-
-mat2 conv_mat2x2_f32(mat2x2_f32 val) {
-  return mat2(val.col0, val.col1);
-}
-
-mat2[4] conv_arr4_mat2x2_f32(mat2x2_f32 val[4]) {
-  mat2 arr[4] = mat2[4](mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_mat2x2_f32(val[i]);
-    }
-  }
-  return arr;
-}
-
-void f() {
-  s.inner = conv_arr4_mat2x2_f32(u.inner);
-  s.inner[1] = conv_mat2x2_f32(u.inner[2u]);
-  s.inner[1][0] = u.inner[0u].col1.yx;
-  s.inner[1][0].x = u.inner[0u].col1[0u];
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.msl
deleted file mode 100644
index 892ef66..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.msl
+++ /dev/null

@@ -1,30 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  int before;
-  float2x2 m;
-  int after;
-};
-
-kernel void f(device tint_array<float2x2, 4>* tint_symbol [[buffer(1)]], const constant tint_array<float2x2, 4>* tint_symbol_1 [[buffer(0)]]) {
-  *(tint_symbol) = *(tint_symbol_1);
-  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
-  (*(tint_symbol))[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
-  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.wgsl
deleted file mode 100644
index 1173503..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<mat2x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  s = u;
-  s[1] = u[2];
-  s[1][0] = u[0][1].yx;
-  s[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl
deleted file mode 100644
index 08ce8f6..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
-var<workgroup> w : array<mat2x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    w = u;
-    w[1] = u[2];
-    w[1][0] = u[0][1].yx;
-    w[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.glsl
deleted file mode 100644
index e459ce3..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.glsl
+++ /dev/null

@@ -1,51 +0,0 @@
-#version 310 es
-
-struct mat2x2_f32 {
-  vec2 col0;
-  vec2 col1;
-};
-
-struct S {
-  int before;
-  mat2 m;
-  int after;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  mat2x2_f32 inner[4];
-} u;
-
-shared mat2 w[4];
-mat2 conv_mat2x2_f32(mat2x2_f32 val) {
-  return mat2(val.col0, val.col1);
-}
-
-mat2[4] conv_arr4_mat2x2_f32(mat2x2_f32 val[4]) {
-  mat2 arr[4] = mat2[4](mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_mat2x2_f32(val[i]);
-    }
-  }
-  return arr;
-}
-
-void f(uint local_invocation_index) {
-  {
-    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-      uint i = idx;
-      w[i] = mat2(vec2(0.0f), vec2(0.0f));
-    }
-  }
-  barrier();
-  w = conv_arr4_mat2x2_f32(u.inner);
-  w[1] = conv_mat2x2_f32(u.inner[2u]);
-  w[1][0] = u.inner[0u].col1.yx;
-  w[1][0].x = u.inner[0u].col1[0u];
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f(gl_LocalInvocationIndex);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.msl
deleted file mode 100644
index 4f8574d..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.msl
+++ /dev/null

@@ -1,44 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct tint_symbol_5 {
-  tint_array<float2x2, 4> w;
-};
-
-struct S {
-  int before;
-  float2x2 m;
-  int after;
-};
-
-void f_inner(uint local_invocation_index, threadgroup tint_array<float2x2, 4>* const tint_symbol, const constant tint_array<float2x2, 4>* const tint_symbol_1) {
-  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-    uint const i = idx;
-    (*(tint_symbol))[i] = float2x2(float2(0.0f), float2(0.0f));
-  }
-  threadgroup_barrier(mem_flags::mem_threadgroup);
-  *(tint_symbol) = *(tint_symbol_1);
-  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
-  (*(tint_symbol))[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
-  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
-}
-
-kernel void f(const constant tint_array<float2x2, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
-  threadgroup tint_array<float2x2, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
-  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.wgsl
deleted file mode 100644
index fdb82d0..0000000
--- a/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
-
-var<workgroup> w : array<mat2x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  w = u;
-  w[1] = u[2];
-  w[1][0] = u[0][1].yx;
-  w[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/dynamic_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/static_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/static_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_builtin.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_builtin.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_fn.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_fn.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl
new file mode 100644
index 0000000..29bc615
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
+var<private> p : array<mat2x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].yx;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..15386d0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,38 @@
+#version 310 es
+
+struct mat2x2_f32 {
+  vec2 col0;
+  vec2 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x2_f32 inner[4];
+} u;
+
+mat2 p[4] = mat2[4](mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f));
+mat2 conv_mat2x2_f32(mat2x2_f32 val) {
+  return mat2(val.col0, val.col1);
+}
+
+mat2[4] conv_arr4_mat2x2_f32(mat2x2_f32 val[4]) {
+  mat2 arr[4] = mat2[4](mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x2_f32(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  p = conv_arr4_mat2x2_f32(u.inner);
+  p[1] = conv_mat2x2_f32(u.inner[2u]);
+  p[1][0] = u.inner[0u].col1.yx;
+  p[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..c9084b3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float2x2, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<float2x2, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_private.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..9636d9a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
+
+var<private> p : array<mat2x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].yx;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl
new file mode 100644
index 0000000..3243429
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat2x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].yx;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..059d58a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,41 @@
+#version 310 es
+
+struct mat2x2_f32 {
+  vec2 col0;
+  vec2 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x2_f32 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat2 inner[4];
+} s;
+
+mat2 conv_mat2x2_f32(mat2x2_f32 val) {
+  return mat2(val.col0, val.col1);
+}
+
+mat2[4] conv_arr4_mat2x2_f32(mat2x2_f32 val[4]) {
+  mat2 arr[4] = mat2[4](mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x2_f32(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  s.inner = conv_arr4_mat2x2_f32(u.inner);
+  s.inner[1] = conv_mat2x2_f32(u.inner[2u]);
+  s.inner[1][0] = u.inner[0u].col1.yx;
+  s.inner[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..2a08172
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<float2x2, 4>* tint_symbol [[buffer(1)]], const constant tint_array<float2x2, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_storage.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..b19ff20
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat2x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].yx;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..02839e7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
+var<workgroup> w : array<mat2x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].yx;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..9108de5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,45 @@
+#version 310 es
+
+struct mat2x2_f32 {
+  vec2 col0;
+  vec2 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x2_f32 inner[4];
+} u;
+
+shared mat2 w[4];
+mat2 conv_mat2x2_f32(mat2x2_f32 val) {
+  return mat2(val.col0, val.col1);
+}
+
+mat2[4] conv_arr4_mat2x2_f32(mat2x2_f32 val[4]) {
+  mat2 arr[4] = mat2[4](mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f), mat2(0.0f, 0.0f, 0.0f, 0.0f));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x2_f32(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = mat2(vec2(0.0f), vec2(0.0f));
+    }
+  }
+  barrier();
+  w = conv_arr4_mat2x2_f32(u.inner);
+  w[1] = conv_mat2x2_f32(u.inner[2u]);
+  w[1][0] = u.inner[0u].col1.yx;
+  w[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..355d60e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<float2x2, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<float2x2, 4>* const tint_symbol, const constant tint_array<float2x2, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = float2x2(float2(0.0f), float2(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<float2x2, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<float2x2, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat2x2/to_workgroup.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..2aaff97
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x2_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x2<f32>, 4>;
+
+var<workgroup> w : array<mat2x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].yx;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..d113381
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat2x3<f16>, 4> = *p_a;
+  let l_a_i     : mat2x3<f16>           = *p_a_i;
+  let l_a_i_i   : vec3<f16>             = *p_a_i_i;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..43a8682
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,49 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 3> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 2, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 2, 3> l_a_i = tint_symbol_1(a, (16u * uint(p_a_i_save)));
+  const uint scalar_offset_2 = (((16u * uint(p_a_i_save)) + (8u * uint(p_a_i_i_save)))) / 4;
+  uint4 ubo_load_5 = a[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..245be9a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,54 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 3> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 2, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 2, 3> l_a_i = tint_symbol_1(a, (16u * uint(p_a_i_save)));
+  const uint scalar_offset_2 = (((16u * uint(p_a_i_save)) + (8u * uint(p_a_i_i_save)))) / 4;
+  uint4 ubo_load_5 = a[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F4D5AC4540(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..75fe088
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,65 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat2x3_f16 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat2x3 conv_mat2x3_f16(mat2x3_f16 val) {
+  return f16mat2x3(val.col0, val.col1);
+}
+
+f16mat2x3[4] conv_arr4_mat2x3_f16(mat2x3_f16 val[4]) {
+  f16mat2x3 arr[4] = f16mat2x3[4](f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16vec3 load_a_inner_p0_p1(uint p0, uint p1) {
+  switch(p1) {
+    case 0u: {
+      return a.inner[p0].col0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].col1;
+      break;
+    }
+    default: {
+      return f16vec3(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat2x3 p_a[4] = conv_arr4_mat2x3_f16(a.inner);
+  int tint_symbol = i();
+  f16mat2x3 p_a_i = conv_mat2x3_f16(a.inner[tint_symbol]);
+  int tint_symbol_1 = i();
+  f16vec3 p_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+  f16mat2x3 l_a[4] = conv_arr4_mat2x3_f16(a.inner);
+  f16mat2x3 l_a_i = conv_mat2x3_f16(a.inner[tint_symbol]);
+  f16vec3 l_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..51600fc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<half2x3, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<half2x3, 4> const l_a = *(tint_symbol_3);
+  half2x3 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  half3 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..1834758
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,165 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 98
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat2x3_f16 "mat2x3_f16"
+               OpMemberName %mat2x3_f16 0 "col0"
+               OpMemberName %mat2x3_f16 1 "col1"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %conv_mat2x3_f16 "conv_mat2x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x3_f16 "conv_arr4_mat2x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_p1 "load_a_inner_p0_p1"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 1 Offset 8
+               OpDecorate %_arr_mat2x3_f16_uint_4 ArrayStride 16
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat2v3half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat2x3_f16 = OpTypeStruct %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x3_f16_uint_4 = OpTypeArray %mat2x3_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat2x3_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat2v3half = OpTypeMatrix %v3half 2
+         %21 = OpTypeFunction %mat2v3half %mat2x3_f16
+%_arr_mat2v3half_uint_4 = OpTypeArray %mat2v3half %uint_4
+         %29 = OpTypeFunction %_arr_mat2v3half_uint_4 %_arr_mat2x3_f16_uint_4
+%_ptr_Function__arr_mat2v3half_uint_4 = OpTypePointer Function %_arr_mat2v3half_uint_4
+         %36 = OpConstantNull %_arr_mat2v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %39 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x3_f16_uint_4 = OpTypePointer Function %_arr_mat2x3_f16_uint_4
+         %52 = OpConstantNull %_arr_mat2x3_f16_uint_4
+%_ptr_Function_mat2v3half = OpTypePointer Function %mat2v3half
+%_ptr_Function_mat2x3_f16 = OpTypePointer Function %mat2x3_f16
+     %uint_1 = OpConstant %uint 1
+         %65 = OpTypeFunction %v3half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+         %80 = OpConstantNull %v3half
+       %void = OpTypeVoid
+         %81 = OpTypeFunction %void
+%_ptr_Uniform__arr_mat2x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x3_f16_uint_4
+%_ptr_Uniform_mat2x3_f16 = OpTypePointer Uniform %mat2x3_f16
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+%conv_mat2x3_f16 = OpFunction %mat2v3half None %21
+        %val = OpFunctionParameter %mat2x3_f16
+         %25 = OpLabel
+         %26 = OpCompositeExtract %v3half %val 0
+         %27 = OpCompositeExtract %v3half %val 1
+         %28 = OpCompositeConstruct %mat2v3half %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_mat2x3_f16 = OpFunction %_arr_mat2v3half_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_mat2x3_f16_uint_4
+         %33 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v3half_uint_4 Function %36
+        %i_0 = OpVariable %_ptr_Function_uint Function %39
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x3_f16_uint_4 Function %52
+               OpBranch %40
+         %40 = OpLabel
+               OpLoopMerge %41 %42 None
+               OpBranch %43
+         %43 = OpLabel
+         %45 = OpLoad %uint %i_0
+         %46 = OpULessThan %bool %45 %uint_4
+         %44 = OpLogicalNot %bool %46
+               OpSelectionMerge %48 None
+               OpBranchConditional %44 %49 %48
+         %49 = OpLabel
+               OpBranch %41
+         %48 = OpLabel
+               OpStore %var_for_index %val_0
+         %53 = OpLoad %uint %i_0
+         %55 = OpAccessChain %_ptr_Function_mat2v3half %arr %53
+         %57 = OpLoad %uint %i_0
+         %59 = OpAccessChain %_ptr_Function_mat2x3_f16 %var_for_index %57
+         %60 = OpLoad %mat2x3_f16 %59
+         %56 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %60
+               OpStore %55 %56
+               OpBranch %42
+         %42 = OpLabel
+         %61 = OpLoad %uint %i_0
+         %63 = OpIAdd %uint %61 %uint_1
+               OpStore %i_0 %63
+               OpBranch %40
+         %41 = OpLabel
+         %64 = OpLoad %_arr_mat2v3half_uint_4 %arr
+               OpReturnValue %64
+               OpFunctionEnd
+%load_a_inner_p0_p1 = OpFunction %v3half None %65
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+         %69 = OpLabel
+               OpSelectionMerge %70 None
+               OpSwitch %p1 %71 0 %72 1 %73
+         %72 = OpLabel
+         %76 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0 %uint_0
+         %77 = OpLoad %v3half %76
+               OpReturnValue %77
+         %73 = OpLabel
+         %78 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0 %uint_1
+         %79 = OpLoad %v3half %78
+               OpReturnValue %79
+         %71 = OpLabel
+               OpReturnValue %80
+         %70 = OpLabel
+               OpReturnValue %80
+               OpFunctionEnd
+          %f = OpFunction %void None %81
+         %84 = OpLabel
+         %85 = OpFunctionCall %int %i
+         %86 = OpFunctionCall %int %i
+         %89 = OpAccessChain %_ptr_Uniform__arr_mat2x3_f16_uint_4 %a %uint_0
+         %90 = OpLoad %_arr_mat2x3_f16_uint_4 %89
+         %87 = OpFunctionCall %_arr_mat2v3half_uint_4 %conv_arr4_mat2x3_f16 %90
+         %93 = OpAccessChain %_ptr_Uniform_mat2x3_f16 %a %uint_0 %85
+         %94 = OpLoad %mat2x3_f16 %93
+         %91 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %94
+         %96 = OpBitcast %uint %85
+         %97 = OpBitcast %uint %86
+         %95 = OpFunctionCall %v3half %load_a_inner_p0_p1 %96 %97
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..ce82043
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat2x3<f16>, 4> = *(p_a);
+  let l_a_i : mat2x3<f16> = *(p_a_i);
+  let l_a_i_i : vec3<f16> = *(p_a_i_i);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..2cd3d2e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,14 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat2x3<f16>, 4> = *p_a;
+  let l_a_i     : mat2x3<f16>           = *p_a_2;
+  let l_a_i_i   : vec3<f16>             = *p_a_2_1;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..6a66652
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,39 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+
+matrix<float16_t, 2, 3> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 2, 3> l_a_i = tint_symbol_1(a, 32u);
+  uint2 ubo_load_4 = a[2].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a57e6de
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,44 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+
+matrix<float16_t, 2, 3> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 2, 3> l_a_i = tint_symbol_1(a, 32u);
+  uint2 ubo_load_4 = a[2].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000016DBC7DD4E0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..4d2072d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,40 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat2x3_f16 inner[4];
+} a;
+
+f16mat2x3 conv_mat2x3_f16(mat2x3_f16 val) {
+  return f16mat2x3(val.col0, val.col1);
+}
+
+f16mat2x3[4] conv_arr4_mat2x3_f16(mat2x3_f16 val[4]) {
+  f16mat2x3 arr[4] = f16mat2x3[4](f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  f16mat2x3 p_a[4] = conv_arr4_mat2x3_f16(a.inner);
+  f16mat2x3 p_a_2 = conv_mat2x3_f16(a.inner[2u]);
+  f16vec3 p_a_2_1 = a.inner[2u].col1;
+  f16mat2x3 l_a[4] = conv_arr4_mat2x3_f16(a.inner);
+  f16mat2x3 l_a_i = conv_mat2x3_f16(a.inner[2u]);
+  f16vec3 l_a_i_i = a.inner[2u].col1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..c864412
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half2x3, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<half2x3, 4> const l_a = *(tint_symbol);
+  half2x3 const l_a_i = (*(tint_symbol))[2];
+  half3 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..dbc9807
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,123 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 71
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat2x3_f16 "mat2x3_f16"
+               OpMemberName %mat2x3_f16 0 "col0"
+               OpMemberName %mat2x3_f16 1 "col1"
+               OpName %a "a"
+               OpName %conv_mat2x3_f16 "conv_mat2x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x3_f16 "conv_arr4_mat2x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 1 Offset 8
+               OpDecorate %_arr_mat2x3_f16_uint_4 ArrayStride 16
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat2v3half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat2x3_f16 = OpTypeStruct %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x3_f16_uint_4 = OpTypeArray %mat2x3_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat2x3_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+         %10 = OpTypeFunction %mat2v3half %mat2x3_f16
+%_arr_mat2v3half_uint_4 = OpTypeArray %mat2v3half %uint_4
+         %18 = OpTypeFunction %_arr_mat2v3half_uint_4 %_arr_mat2x3_f16_uint_4
+%_ptr_Function__arr_mat2v3half_uint_4 = OpTypePointer Function %_arr_mat2v3half_uint_4
+         %25 = OpConstantNull %_arr_mat2v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %28 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x3_f16_uint_4 = OpTypePointer Function %_arr_mat2x3_f16_uint_4
+         %41 = OpConstantNull %_arr_mat2x3_f16_uint_4
+%_ptr_Function_mat2v3half = OpTypePointer Function %mat2v3half
+%_ptr_Function_mat2x3_f16 = OpTypePointer Function %mat2x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %54 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x3_f16_uint_4
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat2x3_f16 = OpTypePointer Uniform %mat2x3_f16
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%conv_mat2x3_f16 = OpFunction %mat2v3half None %10
+        %val = OpFunctionParameter %mat2x3_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v3half %val 0
+         %16 = OpCompositeExtract %v3half %val 1
+         %17 = OpCompositeConstruct %mat2v3half %15 %16
+               OpReturnValue %17
+               OpFunctionEnd
+%conv_arr4_mat2x3_f16 = OpFunction %_arr_mat2v3half_uint_4 None %18
+      %val_0 = OpFunctionParameter %_arr_mat2x3_f16_uint_4
+         %22 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v3half_uint_4 Function %25
+          %i = OpVariable %_ptr_Function_uint Function %28
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x3_f16_uint_4 Function %41
+               OpBranch %29
+         %29 = OpLabel
+               OpLoopMerge %30 %31 None
+               OpBranch %32
+         %32 = OpLabel
+         %34 = OpLoad %uint %i
+         %35 = OpULessThan %bool %34 %uint_4
+         %33 = OpLogicalNot %bool %35
+               OpSelectionMerge %37 None
+               OpBranchConditional %33 %38 %37
+         %38 = OpLabel
+               OpBranch %30
+         %37 = OpLabel
+               OpStore %var_for_index %val_0
+         %42 = OpLoad %uint %i
+         %44 = OpAccessChain %_ptr_Function_mat2v3half %arr %42
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_mat2x3_f16 %var_for_index %46
+         %49 = OpLoad %mat2x3_f16 %48
+         %45 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %49
+               OpStore %44 %45
+               OpBranch %31
+         %31 = OpLabel
+         %50 = OpLoad %uint %i
+         %52 = OpIAdd %uint %50 %uint_1
+               OpStore %i %52
+               OpBranch %29
+         %30 = OpLabel
+         %53 = OpLoad %_arr_mat2v3half_uint_4 %arr
+               OpReturnValue %53
+               OpFunctionEnd
+          %f = OpFunction %void None %54
+         %57 = OpLabel
+         %61 = OpAccessChain %_ptr_Uniform__arr_mat2x3_f16_uint_4 %a %uint_0
+         %62 = OpLoad %_arr_mat2x3_f16_uint_4 %61
+         %58 = OpFunctionCall %_arr_mat2v3half_uint_4 %conv_arr4_mat2x3_f16 %62
+         %66 = OpAccessChain %_ptr_Uniform_mat2x3_f16 %a %uint_0 %uint_2
+         %67 = OpLoad %mat2x3_f16 %66
+         %63 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %67
+         %69 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %uint_2 %uint_1
+         %70 = OpLoad %v3half %69
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..89154db
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat2x3<f16>, 4> = *(p_a);
+  let l_a_i : mat2x3<f16> = *(p_a_2);
+  let l_a_i_i : vec3<f16> = *(p_a_2_1);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl
new file mode 100644
index 0000000..b40fdcf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].zxy);
+    let a = abs(u[0][1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..277e378
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 2> t = transpose(tint_symbol(u, 32u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  uint2 ubo_load_5 = u[0].zw;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d47ca3e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,36 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 2> t = transpose(tint_symbol(u, 32u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  uint2 ubo_load_5 = u[0].zw;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001B4475A47D0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..f13e21d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,27 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x3_f16 inner[4];
+} u;
+
+f16mat2x3 conv_mat2x3_f16(mat2x3_f16 val) {
+  return f16mat2x3(val.col0, val.col1);
+}
+
+void f() {
+  f16mat3x2 t = transpose(conv_mat2x3_f16(u.inner[2u]));
+  float16_t l = length(u.inner[0u].col1.zxy);
+  float16_t a = abs(u.inner[0u].col1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..205944f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half2x3, 4>* tint_symbol [[buffer(0)]]) {
+  half3x2 const t = transpose((*(tint_symbol))[2]);
+  half const l = length(half3((*(tint_symbol))[0][1]).zxy);
+  half const a = fabs(half3((*(tint_symbol))[0][1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..ec52e3a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %32 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat2x3_f16 "mat2x3_f16"
+               OpMemberName %mat2x3_f16 0 "col0"
+               OpMemberName %mat2x3_f16 1 "col1"
+               OpName %u "u"
+               OpName %conv_mat2x3_f16 "conv_mat2x3_f16"
+               OpName %val "val"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 1 Offset 8
+               OpDecorate %_arr_mat2x3_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat2x3_f16 = OpTypeStruct %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x3_f16_uint_4 = OpTypeArray %mat2x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat2x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+         %10 = OpTypeFunction %mat2v3half %mat2x3_f16
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+     %v2half = OpTypeVector %half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat2x3_f16 = OpTypePointer Uniform %mat2x3_f16
+         %33 = OpConstantNull %uint
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%conv_mat2x3_f16 = OpFunction %mat2v3half None %10
+        %val = OpFunctionParameter %mat2x3_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v3half %val 0
+         %16 = OpCompositeExtract %v3half %val 1
+         %17 = OpCompositeConstruct %mat2v3half %15 %16
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %29 = OpAccessChain %_ptr_Uniform_mat2x3_f16 %u %uint_0 %uint_2
+         %30 = OpLoad %mat2x3_f16 %29
+         %25 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %30
+         %22 = OpTranspose %mat3v2half %25
+         %36 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %33 %uint_1
+         %37 = OpLoad %v3half %36
+         %38 = OpVectorShuffle %v3half %37 %37 2 0 1
+         %31 = OpExtInst %half %32 Length %38
+         %40 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %33 %uint_1
+         %41 = OpLoad %v3half %40
+         %42 = OpVectorShuffle %v3half %41 %41 2 0 1
+         %43 = OpCompositeExtract %half %42 0
+         %39 = OpExtInst %half %32 FAbs %43
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..8b29ec4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].zxy);
+  let a = abs(u[0][1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl
new file mode 100644
index 0000000..a2106b1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f16>, 4>;
+
+fn a(a : array<mat2x3<f16>, 4>) {}
+fn b(m : mat2x3<f16>) {}
+fn c(v : vec3<f16>) {}
+fn d(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].zxy);
+    d(u[1][0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..19bfcc6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,55 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+void a(matrix<float16_t, 2, 3> a_1[4]) {
+}
+
+void b(matrix<float16_t, 2, 3> m) {
+}
+
+void c(vector<float16_t, 3> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 3> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 16u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c1210ac
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,63 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+void a(matrix<float16_t, 2, 3> a_1[4]) {
+}
+
+void b(matrix<float16_t, 2, 3> m) {
+}
+
+void c(vector<float16_t, 3> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 3> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 16u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002290980B6C0(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002290980B6C0(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002290980B6C0(11,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002290980B6C0(14,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..e385c2b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,50 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x3_f16 inner[4];
+} u;
+
+void a(f16mat2x3 a_1[4]) {
+}
+
+void b(f16mat2x3 m) {
+}
+
+void c(f16vec3 v) {
+}
+
+void d(float16_t f_1) {
+}
+
+f16mat2x3 conv_mat2x3_f16(mat2x3_f16 val) {
+  return f16mat2x3(val.col0, val.col1);
+}
+
+f16mat2x3[4] conv_arr4_mat2x3_f16(mat2x3_f16 val[4]) {
+  f16mat2x3 arr[4] = f16mat2x3[4](f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  a(conv_arr4_mat2x3_f16(u.inner));
+  b(conv_mat2x3_f16(u.inner[1u]));
+  c(u.inner[1u].col0.zxy);
+  d(u.inner[1u].col0.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..a1cefb5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<half2x3, 4> a_1) {
+}
+
+void b(half2x3 m) {
+}
+
+void c(half3 v) {
+}
+
+void d(half f_1) {
+}
+
+kernel void f(const constant tint_array<half2x3, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(half3((*(tint_symbol))[1][0]).zxy);
+  d(half3((*(tint_symbol))[1][0]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..bbd5798
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,163 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 95
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat2x3_f16 "mat2x3_f16"
+               OpMemberName %mat2x3_f16 0 "col0"
+               OpMemberName %mat2x3_f16 1 "col1"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %conv_mat2x3_f16 "conv_mat2x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x3_f16 "conv_arr4_mat2x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 1 Offset 8
+               OpDecorate %_arr_mat2x3_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat2v3half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat2x3_f16 = OpTypeStruct %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x3_f16_uint_4 = OpTypeArray %mat2x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat2x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat2v3half = OpTypeMatrix %v3half 2
+%_arr_mat2v3half_uint_4 = OpTypeArray %mat2v3half %uint_4
+         %10 = OpTypeFunction %void %_arr_mat2v3half_uint_4
+         %17 = OpTypeFunction %void %mat2v3half
+         %21 = OpTypeFunction %void %v3half
+         %25 = OpTypeFunction %void %half
+         %29 = OpTypeFunction %mat2v3half %mat2x3_f16
+         %36 = OpTypeFunction %_arr_mat2v3half_uint_4 %_arr_mat2x3_f16_uint_4
+%_ptr_Function__arr_mat2v3half_uint_4 = OpTypePointer Function %_arr_mat2v3half_uint_4
+         %42 = OpConstantNull %_arr_mat2v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %45 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x3_f16_uint_4 = OpTypePointer Function %_arr_mat2x3_f16_uint_4
+         %58 = OpConstantNull %_arr_mat2x3_f16_uint_4
+%_ptr_Function_mat2v3half = OpTypePointer Function %mat2v3half
+%_ptr_Function_mat2x3_f16 = OpTypePointer Function %mat2x3_f16
+     %uint_1 = OpConstant %uint 1
+         %71 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x3_f16_uint_4
+%_ptr_Uniform_mat2x3_f16 = OpTypePointer Uniform %mat2x3_f16
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat2v3half_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %m = OpFunctionParameter %mat2v3half
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %v = OpFunctionParameter %v3half
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+        %f_1 = OpFunctionParameter %half
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%conv_mat2x3_f16 = OpFunction %mat2v3half None %29
+        %val = OpFunctionParameter %mat2x3_f16
+         %32 = OpLabel
+         %33 = OpCompositeExtract %v3half %val 0
+         %34 = OpCompositeExtract %v3half %val 1
+         %35 = OpCompositeConstruct %mat2v3half %33 %34
+               OpReturnValue %35
+               OpFunctionEnd
+%conv_arr4_mat2x3_f16 = OpFunction %_arr_mat2v3half_uint_4 None %36
+      %val_0 = OpFunctionParameter %_arr_mat2x3_f16_uint_4
+         %39 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v3half_uint_4 Function %42
+          %i = OpVariable %_ptr_Function_uint Function %45
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x3_f16_uint_4 Function %58
+               OpBranch %46
+         %46 = OpLabel
+               OpLoopMerge %47 %48 None
+               OpBranch %49
+         %49 = OpLabel
+         %51 = OpLoad %uint %i
+         %52 = OpULessThan %bool %51 %uint_4
+         %50 = OpLogicalNot %bool %52
+               OpSelectionMerge %54 None
+               OpBranchConditional %50 %55 %54
+         %55 = OpLabel
+               OpBranch %47
+         %54 = OpLabel
+               OpStore %var_for_index %val_0
+         %59 = OpLoad %uint %i
+         %61 = OpAccessChain %_ptr_Function_mat2v3half %arr %59
+         %63 = OpLoad %uint %i
+         %65 = OpAccessChain %_ptr_Function_mat2x3_f16 %var_for_index %63
+         %66 = OpLoad %mat2x3_f16 %65
+         %62 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %66
+               OpStore %61 %62
+               OpBranch %48
+         %48 = OpLabel
+         %67 = OpLoad %uint %i
+         %69 = OpIAdd %uint %67 %uint_1
+               OpStore %i %69
+               OpBranch %46
+         %47 = OpLabel
+         %70 = OpLoad %_arr_mat2v3half_uint_4 %arr
+               OpReturnValue %70
+               OpFunctionEnd
+          %f = OpFunction %void None %71
+         %73 = OpLabel
+         %78 = OpAccessChain %_ptr_Uniform__arr_mat2x3_f16_uint_4 %u %uint_0
+         %79 = OpLoad %_arr_mat2x3_f16_uint_4 %78
+         %75 = OpFunctionCall %_arr_mat2v3half_uint_4 %conv_arr4_mat2x3_f16 %79
+         %74 = OpFunctionCall %void %a %75
+         %83 = OpAccessChain %_ptr_Uniform_mat2x3_f16 %u %uint_0 %uint_1
+         %84 = OpLoad %mat2x3_f16 %83
+         %81 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %84
+         %80 = OpFunctionCall %void %b %81
+         %87 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %uint_1 %uint_0
+         %88 = OpLoad %v3half %87
+         %89 = OpVectorShuffle %v3half %88 %88 2 0 1
+         %85 = OpFunctionCall %void %c %89
+         %91 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %uint_1 %uint_0
+         %92 = OpLoad %v3half %91
+         %93 = OpVectorShuffle %v3half %92 %92 2 0 1
+         %94 = OpCompositeExtract %half %93 0
+         %90 = OpFunctionCall %void %d %94
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..d0132c8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,23 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f16>, 4>;
+
+fn a(a : array<mat2x3<f16>, 4>) {
+}
+
+fn b(m : mat2x3<f16>) {
+}
+
+fn c(v : vec3<f16>) {
+}
+
+fn d(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].zxy);
+  d(u[1][0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl
new file mode 100644
index 0000000..ce113f8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f16>, 4>;
+var<private> p : array<mat2x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].zxy;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9fbd299
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+static matrix<float16_t, 2, 3> p[4] = (matrix<float16_t, 2, 3>[4])0;
+
+matrix<float16_t, 2, 3> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 32u);
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  p[1][0] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy;
+  p[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a5d49fe
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+static matrix<float16_t, 2, 3> p[4] = (matrix<float16_t, 2, 3>[4])0;
+
+matrix<float16_t, 2, 3> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 32u);
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  p[1][0] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy;
+  p[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001C62197B570(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..dde921b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,39 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x3_f16 inner[4];
+} u;
+
+f16mat2x3 p[4] = f16mat2x3[4](f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+f16mat2x3 conv_mat2x3_f16(mat2x3_f16 val) {
+  return f16mat2x3(val.col0, val.col1);
+}
+
+f16mat2x3[4] conv_arr4_mat2x3_f16(mat2x3_f16 val[4]) {
+  f16mat2x3 arr[4] = f16mat2x3[4](f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  p = conv_arr4_mat2x3_f16(u.inner);
+  p[1] = conv_mat2x3_f16(u.inner[2u]);
+  p[1][0] = u.inner[0u].col1.zxy;
+  p[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..f406191
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half2x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<half2x3, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = half3((*(tint_symbol_1))[0][1]).zxy;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..eab324b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,143 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 86
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat2x3_f16 "mat2x3_f16"
+               OpMemberName %mat2x3_f16 0 "col0"
+               OpMemberName %mat2x3_f16 1 "col1"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %conv_mat2x3_f16 "conv_mat2x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x3_f16 "conv_arr4_mat2x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 1 Offset 8
+               OpDecorate %_arr_mat2x3_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat2v3half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat2x3_f16 = OpTypeStruct %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x3_f16_uint_4 = OpTypeArray %mat2x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat2x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+%_arr_mat2v3half_uint_4 = OpTypeArray %mat2v3half %uint_4
+%_ptr_Private__arr_mat2v3half_uint_4 = OpTypePointer Private %_arr_mat2v3half_uint_4
+         %14 = OpConstantNull %_arr_mat2v3half_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat2v3half_uint_4 Private %14
+         %15 = OpTypeFunction %mat2v3half %mat2x3_f16
+         %22 = OpTypeFunction %_arr_mat2v3half_uint_4 %_arr_mat2x3_f16_uint_4
+%_ptr_Function__arr_mat2v3half_uint_4 = OpTypePointer Function %_arr_mat2v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %30 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x3_f16_uint_4 = OpTypePointer Function %_arr_mat2x3_f16_uint_4
+         %43 = OpConstantNull %_arr_mat2x3_f16_uint_4
+%_ptr_Function_mat2v3half = OpTypePointer Function %mat2v3half
+%_ptr_Function_mat2x3_f16 = OpTypePointer Function %mat2x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %56 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x3_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat2v3half = OpTypePointer Private %mat2v3half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat2x3_f16 = OpTypePointer Uniform %mat2x3_f16
+         %74 = OpConstantNull %int
+%_ptr_Private_v3half = OpTypePointer Private %v3half
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%_ptr_Private_half = OpTypePointer Private %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat2x3_f16 = OpFunction %mat2v3half None %15
+        %val = OpFunctionParameter %mat2x3_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v3half %val 0
+         %20 = OpCompositeExtract %v3half %val 1
+         %21 = OpCompositeConstruct %mat2v3half %19 %20
+               OpReturnValue %21
+               OpFunctionEnd
+%conv_arr4_mat2x3_f16 = OpFunction %_arr_mat2v3half_uint_4 None %22
+      %val_0 = OpFunctionParameter %_arr_mat2x3_f16_uint_4
+         %25 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v3half_uint_4 Function %14
+          %i = OpVariable %_ptr_Function_uint Function %30
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x3_f16_uint_4 Function %43
+               OpBranch %31
+         %31 = OpLabel
+               OpLoopMerge %32 %33 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %uint %i
+         %37 = OpULessThan %bool %36 %uint_4
+         %35 = OpLogicalNot %bool %37
+               OpSelectionMerge %39 None
+               OpBranchConditional %35 %40 %39
+         %40 = OpLabel
+               OpBranch %32
+         %39 = OpLabel
+               OpStore %var_for_index %val_0
+         %44 = OpLoad %uint %i
+         %46 = OpAccessChain %_ptr_Function_mat2v3half %arr %44
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_mat2x3_f16 %var_for_index %48
+         %51 = OpLoad %mat2x3_f16 %50
+         %47 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %51
+               OpStore %46 %47
+               OpBranch %33
+         %33 = OpLabel
+         %52 = OpLoad %uint %i
+         %54 = OpIAdd %uint %52 %uint_1
+               OpStore %i %54
+               OpBranch %31
+         %32 = OpLabel
+         %55 = OpLoad %_arr_mat2v3half_uint_4 %arr
+               OpReturnValue %55
+               OpFunctionEnd
+          %f = OpFunction %void None %56
+         %59 = OpLabel
+         %63 = OpAccessChain %_ptr_Uniform__arr_mat2x3_f16_uint_4 %u %uint_0
+         %64 = OpLoad %_arr_mat2x3_f16_uint_4 %63
+         %60 = OpFunctionCall %_arr_mat2v3half_uint_4 %conv_arr4_mat2x3_f16 %64
+               OpStore %p %60
+         %68 = OpAccessChain %_ptr_Private_mat2v3half %p %int_1
+         %72 = OpAccessChain %_ptr_Uniform_mat2x3_f16 %u %uint_0 %uint_2
+         %73 = OpLoad %mat2x3_f16 %72
+         %69 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %73
+               OpStore %68 %69
+         %76 = OpAccessChain %_ptr_Private_v3half %p %int_1 %74
+         %78 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %30 %uint_1
+         %79 = OpLoad %v3half %78
+         %80 = OpVectorShuffle %v3half %79 %79 2 0 1
+               OpStore %76 %80
+         %82 = OpAccessChain %_ptr_Private_half %p %int_1 %74 %uint_0
+         %84 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %30 %uint_1 %30
+         %85 = OpLoad %half %84
+               OpStore %82 %85
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..33edad7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f16>, 4>;
+
+var<private> p : array<mat2x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].zxy;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl
new file mode 100644
index 0000000..adc8b56
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f16>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat2x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].zxy;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..26f9723
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,55 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value[4]) {
+  matrix<float16_t, 2, 3> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 16u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 2, 3> tint_symbol_4(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 16u, tint_symbol_4(u, 32u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  s.Store<vector<float16_t, 3> >(16u, vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  s.Store<float16_t>(16u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..da9a817
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,61 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value[4]) {
+  matrix<float16_t, 2, 3> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 16u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 2, 3> tint_symbol_4(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 16u, tint_symbol_4(u, 32u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  s.Store<vector<float16_t, 3> >(16u, vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  s.Store<float16_t>(16u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001AB590A03F0(6,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001AB590A03F0(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..48da1c9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,42 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x3_f16 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat2x3 inner[4];
+} s;
+
+f16mat2x3 conv_mat2x3_f16(mat2x3_f16 val) {
+  return f16mat2x3(val.col0, val.col1);
+}
+
+f16mat2x3[4] conv_arr4_mat2x3_f16(mat2x3_f16 val[4]) {
+  f16mat2x3 arr[4] = f16mat2x3[4](f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  s.inner = conv_arr4_mat2x3_f16(u.inner);
+  s.inner[1] = conv_mat2x3_f16(u.inner[2u]);
+  s.inner[1][0] = u.inner[0u].col1.zxy;
+  s.inner[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..edcc1d9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<half2x3, 4>* tint_symbol [[buffer(1)]], const constant tint_array<half2x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..8d6eba2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,154 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 89
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat2x3_f16 "mat2x3_f16"
+               OpMemberName %mat2x3_f16 0 "col0"
+               OpMemberName %mat2x3_f16 1 "col1"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %conv_mat2x3_f16 "conv_mat2x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x3_f16 "conv_arr4_mat2x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 1 Offset 8
+               OpDecorate %_arr_mat2x3_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 8
+               OpDecorate %_arr_mat2v3half_uint_4 ArrayStride 16
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat2x3_f16 = OpTypeStruct %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x3_f16_uint_4 = OpTypeArray %mat2x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat2x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+%_arr_mat2v3half_uint_4 = OpTypeArray %mat2v3half %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v3half_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %15 = OpTypeFunction %mat2v3half %mat2x3_f16
+         %22 = OpTypeFunction %_arr_mat2v3half_uint_4 %_arr_mat2x3_f16_uint_4
+%_ptr_Function__arr_mat2v3half_uint_4 = OpTypePointer Function %_arr_mat2v3half_uint_4
+         %28 = OpConstantNull %_arr_mat2v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %31 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x3_f16_uint_4 = OpTypePointer Function %_arr_mat2x3_f16_uint_4
+         %44 = OpConstantNull %_arr_mat2x3_f16_uint_4
+%_ptr_Function_mat2v3half = OpTypePointer Function %mat2v3half
+%_ptr_Function_mat2x3_f16 = OpTypePointer Function %mat2x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %57 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat2v3half_uint_4 = OpTypePointer StorageBuffer %_arr_mat2v3half_uint_4
+%_ptr_Uniform__arr_mat2x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x3_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat2v3half = OpTypePointer StorageBuffer %mat2v3half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat2x3_f16 = OpTypePointer Uniform %mat2x3_f16
+         %77 = OpConstantNull %int
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat2x3_f16 = OpFunction %mat2v3half None %15
+        %val = OpFunctionParameter %mat2x3_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v3half %val 0
+         %20 = OpCompositeExtract %v3half %val 1
+         %21 = OpCompositeConstruct %mat2v3half %19 %20
+               OpReturnValue %21
+               OpFunctionEnd
+%conv_arr4_mat2x3_f16 = OpFunction %_arr_mat2v3half_uint_4 None %22
+      %val_0 = OpFunctionParameter %_arr_mat2x3_f16_uint_4
+         %25 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v3half_uint_4 Function %28
+          %i = OpVariable %_ptr_Function_uint Function %31
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x3_f16_uint_4 Function %44
+               OpBranch %32
+         %32 = OpLabel
+               OpLoopMerge %33 %34 None
+               OpBranch %35
+         %35 = OpLabel
+         %37 = OpLoad %uint %i
+         %38 = OpULessThan %bool %37 %uint_4
+         %36 = OpLogicalNot %bool %38
+               OpSelectionMerge %40 None
+               OpBranchConditional %36 %41 %40
+         %41 = OpLabel
+               OpBranch %33
+         %40 = OpLabel
+               OpStore %var_for_index %val_0
+         %45 = OpLoad %uint %i
+         %47 = OpAccessChain %_ptr_Function_mat2v3half %arr %45
+         %49 = OpLoad %uint %i
+         %51 = OpAccessChain %_ptr_Function_mat2x3_f16 %var_for_index %49
+         %52 = OpLoad %mat2x3_f16 %51
+         %48 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %52
+               OpStore %47 %48
+               OpBranch %34
+         %34 = OpLabel
+         %53 = OpLoad %uint %i
+         %55 = OpIAdd %uint %53 %uint_1
+               OpStore %i %55
+               OpBranch %32
+         %33 = OpLabel
+         %56 = OpLoad %_arr_mat2v3half_uint_4 %arr
+               OpReturnValue %56
+               OpFunctionEnd
+          %f = OpFunction %void None %57
+         %60 = OpLabel
+         %63 = OpAccessChain %_ptr_StorageBuffer__arr_mat2v3half_uint_4 %s %uint_0
+         %66 = OpAccessChain %_ptr_Uniform__arr_mat2x3_f16_uint_4 %u %uint_0
+         %67 = OpLoad %_arr_mat2x3_f16_uint_4 %66
+         %64 = OpFunctionCall %_arr_mat2v3half_uint_4 %conv_arr4_mat2x3_f16 %67
+               OpStore %63 %64
+         %71 = OpAccessChain %_ptr_StorageBuffer_mat2v3half %s %uint_0 %int_1
+         %75 = OpAccessChain %_ptr_Uniform_mat2x3_f16 %u %uint_0 %uint_2
+         %76 = OpLoad %mat2x3_f16 %75
+         %72 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %76
+               OpStore %71 %72
+         %79 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1 %77
+         %81 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %31 %uint_1
+         %82 = OpLoad %v3half %81
+         %83 = OpVectorShuffle %v3half %82 %82 2 0 1
+               OpStore %79 %83
+         %85 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %int_1 %77 %uint_0
+         %87 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %31 %uint_1 %31
+         %88 = OpLoad %half %87
+               OpStore %85 %88
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..a29c5d3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f16>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat2x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].zxy;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..dc18eed
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f16>, 4>;
+var<workgroup> w : array<mat2x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].zxy;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..4f956e9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,56 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+groupshared matrix<float16_t, 2, 3> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 3> tint_symbol_3(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 2, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 32u);
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  w[1][0] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy;
+  w[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b3d2fa5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,61 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+groupshared matrix<float16_t, 2, 3> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 3> tint_symbol_3(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+typedef matrix<float16_t, 2, 3> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 3> arr[4] = (matrix<float16_t, 2, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 2, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 32u);
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  w[1][0] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy;
+  w[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000019397EFCDD0(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..36138b0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,46 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x3_f16 inner[4];
+} u;
+
+shared f16mat2x3 w[4];
+f16mat2x3 conv_mat2x3_f16(mat2x3_f16 val) {
+  return f16mat2x3(val.col0, val.col1);
+}
+
+f16mat2x3[4] conv_arr4_mat2x3_f16(mat2x3_f16 val[4]) {
+  f16mat2x3 arr[4] = f16mat2x3[4](f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = f16mat2x3(f16vec3(0.0hf), f16vec3(0.0hf));
+    }
+  }
+  barrier();
+  w = conv_arr4_mat2x3_f16(u.inner);
+  w[1] = conv_mat2x3_f16(u.inner[2u]);
+  w[1][0] = u.inner[0u].col1.zxy;
+  w[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..ac44a93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<half2x3, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<half2x3, 4>* const tint_symbol, const constant tint_array<half2x3, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = half2x3(half3(0.0h), half3(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<half2x3, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<half2x3, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..3cdc7db
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,186 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 111
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat2x3_f16 "mat2x3_f16"
+               OpMemberName %mat2x3_f16 0 "col0"
+               OpMemberName %mat2x3_f16 1 "col1"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %conv_mat2x3_f16 "conv_mat2x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x3_f16 "conv_arr4_mat2x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 0 Offset 0
+               OpMemberDecorate %mat2x3_f16 1 Offset 8
+               OpDecorate %_arr_mat2x3_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat2v3half_uint_4 ArrayStride 16
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat2x3_f16 = OpTypeStruct %v3half %v3half
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x3_f16_uint_4 = OpTypeArray %mat2x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat2x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+%_arr_mat2v3half_uint_4 = OpTypeArray %mat2v3half %uint_4
+%_ptr_Workgroup__arr_mat2v3half_uint_4 = OpTypePointer Workgroup %_arr_mat2v3half_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat2v3half_uint_4 Workgroup
+         %16 = OpTypeFunction %mat2v3half %mat2x3_f16
+         %23 = OpTypeFunction %_arr_mat2v3half_uint_4 %_arr_mat2x3_f16_uint_4
+%_ptr_Function__arr_mat2v3half_uint_4 = OpTypePointer Function %_arr_mat2v3half_uint_4
+         %29 = OpConstantNull %_arr_mat2v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %32 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x3_f16_uint_4 = OpTypePointer Function %_arr_mat2x3_f16_uint_4
+         %45 = OpConstantNull %_arr_mat2x3_f16_uint_4
+%_ptr_Function_mat2v3half = OpTypePointer Function %mat2v3half
+%_ptr_Function_mat2x3_f16 = OpTypePointer Function %mat2x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %58 = OpTypeFunction %void %uint
+%_ptr_Workgroup_mat2v3half = OpTypePointer Workgroup %mat2v3half
+         %76 = OpConstantNull %mat2v3half
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x3_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat2x3_f16 = OpTypePointer Uniform %mat2x3_f16
+         %94 = OpConstantNull %int
+%_ptr_Workgroup_v3half = OpTypePointer Workgroup %v3half
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %106 = OpTypeFunction %void
+%conv_mat2x3_f16 = OpFunction %mat2v3half None %16
+        %val = OpFunctionParameter %mat2x3_f16
+         %19 = OpLabel
+         %20 = OpCompositeExtract %v3half %val 0
+         %21 = OpCompositeExtract %v3half %val 1
+         %22 = OpCompositeConstruct %mat2v3half %20 %21
+               OpReturnValue %22
+               OpFunctionEnd
+%conv_arr4_mat2x3_f16 = OpFunction %_arr_mat2v3half_uint_4 None %23
+      %val_0 = OpFunctionParameter %_arr_mat2x3_f16_uint_4
+         %26 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v3half_uint_4 Function %29
+          %i = OpVariable %_ptr_Function_uint Function %32
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x3_f16_uint_4 Function %45
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %34 %35 None
+               OpBranch %36
+         %36 = OpLabel
+         %38 = OpLoad %uint %i
+         %39 = OpULessThan %bool %38 %uint_4
+         %37 = OpLogicalNot %bool %39
+               OpSelectionMerge %41 None
+               OpBranchConditional %37 %42 %41
+         %42 = OpLabel
+               OpBranch %34
+         %41 = OpLabel
+               OpStore %var_for_index %val_0
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_mat2v3half %arr %46
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_mat2x3_f16 %var_for_index %50
+         %53 = OpLoad %mat2x3_f16 %52
+         %49 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %53
+               OpStore %48 %49
+               OpBranch %35
+         %35 = OpLabel
+         %54 = OpLoad %uint %i
+         %56 = OpIAdd %uint %54 %uint_1
+               OpStore %i %56
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %_arr_mat2v3half_uint_4 %arr
+               OpReturnValue %57
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %58
+%local_invocation_index = OpFunctionParameter %uint
+         %62 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %32
+               OpStore %idx %local_invocation_index
+               OpBranch %64
+         %64 = OpLabel
+               OpLoopMerge %65 %66 None
+               OpBranch %67
+         %67 = OpLabel
+         %69 = OpLoad %uint %idx
+         %70 = OpULessThan %bool %69 %uint_4
+         %68 = OpLogicalNot %bool %70
+               OpSelectionMerge %71 None
+               OpBranchConditional %68 %72 %71
+         %72 = OpLabel
+               OpBranch %65
+         %71 = OpLabel
+         %73 = OpLoad %uint %idx
+         %75 = OpAccessChain %_ptr_Workgroup_mat2v3half %w %73
+               OpStore %75 %76
+               OpBranch %66
+         %66 = OpLabel
+         %77 = OpLoad %uint %idx
+         %78 = OpIAdd %uint %77 %uint_1
+               OpStore %idx %78
+               OpBranch %64
+         %65 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %85 = OpAccessChain %_ptr_Uniform__arr_mat2x3_f16_uint_4 %u %uint_0
+         %86 = OpLoad %_arr_mat2x3_f16_uint_4 %85
+         %82 = OpFunctionCall %_arr_mat2v3half_uint_4 %conv_arr4_mat2x3_f16 %86
+               OpStore %w %82
+         %89 = OpAccessChain %_ptr_Workgroup_mat2v3half %w %int_1
+         %92 = OpAccessChain %_ptr_Uniform_mat2x3_f16 %u %uint_0 %uint_2
+         %93 = OpLoad %mat2x3_f16 %92
+         %90 = OpFunctionCall %mat2v3half %conv_mat2x3_f16 %93
+               OpStore %89 %90
+         %96 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1 %94
+         %98 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %32 %uint_1
+         %99 = OpLoad %v3half %98
+        %100 = OpVectorShuffle %v3half %99 %99 2 0 1
+               OpStore %96 %100
+        %102 = OpAccessChain %_ptr_Workgroup_half %w %int_1 %94 %uint_0
+        %104 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %32 %uint_1 %32
+        %105 = OpLoad %half %104
+               OpStore %102 %105
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %106
+        %108 = OpLabel
+        %110 = OpLoad %uint %local_invocation_index_1
+        %109 = OpFunctionCall %void %f_inner %110
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..0b15098
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f16>, 4>;
+
+var<workgroup> w : array<mat2x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].zxy;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..f50aa0c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f32>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat2x3<f32>, 4> = *p_a;
+  let l_a_i     : mat2x3<f32>           = *p_a_i;
+  let l_a_i_i   : vec3<f32>             = *p_a_i_i;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0533b18
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,37 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x3 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float2x3 l_a[4] = tint_symbol(a, 0u);
+  const float2x3 l_a_i = tint_symbol_1(a, (32u * uint(p_a_i_save)));
+  const uint scalar_offset_2 = (((32u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float3 l_a_i_i = asfloat(a[scalar_offset_2 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0533b18
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,37 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x3 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float2x3 l_a[4] = tint_symbol(a, 0u);
+  const float2x3 l_a_i = tint_symbol_1(a, (32u * uint(p_a_i_save)));
+  const uint scalar_offset_2 = (((32u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float3 l_a_i_i = asfloat(a[scalar_offset_2 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..7bdae86
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,27 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat2x3 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_i_save = tint_symbol_1;
+  mat2x3 l_a[4] = a.inner;
+  mat2x3 l_a_i = a.inner[p_a_i_save];
+  vec3 l_a_i_i = a.inner[p_a_i_save][p_a_i_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..ba4070a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<float2x3, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<float2x3, 4> const l_a = *(tint_symbol_3);
+  float2x3 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  float3 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..53e42b0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,64 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v3float_uint_4 ArrayStride 32
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v3float_uint_4 = OpTypeArray %mat2v3float %uint_4
+    %a_block = OpTypeStruct %_arr_mat2v3float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %21 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2v3float_uint_4 = OpTypePointer Uniform %_arr_mat2v3float_uint_4
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+          %f = OpFunction %void None %21
+         %24 = OpLabel
+         %25 = OpFunctionCall %int %i
+         %26 = OpFunctionCall %int %i
+         %29 = OpAccessChain %_ptr_Uniform__arr_mat2v3float_uint_4 %a %uint_0
+         %30 = OpLoad %_arr_mat2v3float_uint_4 %29
+         %32 = OpAccessChain %_ptr_Uniform_mat2v3float %a %uint_0 %25
+         %33 = OpLoad %mat2v3float %32
+         %35 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %25 %26
+         %36 = OpLoad %v3float %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..afba90a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f32>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat2x3<f32>, 4> = *(p_a);
+  let l_a_i : mat2x3<f32> = *(p_a_i);
+  let l_a_i_i : vec3<f32> = *(p_a_i_i);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..8e1e4087
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,12 @@
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat2x3<f32>, 4> = *p_a;
+  let l_a_i     : mat2x3<f32>           = *p_a_2;
+  let l_a_i_i   : vec3<f32>             = *p_a_2_1;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a6dcbe4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+
+float2x3 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x3 l_a[4] = tint_symbol(a, 0u);
+  const float2x3 l_a_i = tint_symbol_1(a, 64u);
+  const float3 l_a_i_i = asfloat(a[5].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a6dcbe4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+
+float2x3 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x3 l_a[4] = tint_symbol(a, 0u);
+  const float2x3 l_a_i = tint_symbol_1(a, 64u);
+  const float3 l_a_i_i = asfloat(a[5].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..5cd53bf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat2x3 inner[4];
+} a;
+
+void f() {
+  mat2x3 l_a[4] = a.inner;
+  mat2x3 l_a_i = a.inner[2];
+  vec3 l_a_i_i = a.inner[2][1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..87f2879
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float2x3, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<float2x3, 4> const l_a = *(tint_symbol);
+  float2x3 const l_a_i = (*(tint_symbol))[2];
+  float3 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..c6340b4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,49 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v3float_uint_4 ArrayStride 32
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v3float_uint_4 = OpTypeArray %mat2v3float %uint_4
+    %a_block = OpTypeStruct %_arr_mat2v3float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2v3float_uint_4 = OpTypePointer Uniform %_arr_mat2v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform__arr_mat2v3float_uint_4 %a %uint_0
+         %17 = OpLoad %_arr_mat2v3float_uint_4 %16
+         %21 = OpAccessChain %_ptr_Uniform_mat2v3float %a %uint_0 %int_2
+         %22 = OpLoad %mat2v3float %21
+         %25 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %int_2 %int_1
+         %26 = OpLoad %v3float %25
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..44250e7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat2x3<f32>, 4> = *(p_a);
+  let l_a_i : mat2x3<f32> = *(p_a_2);
+  let l_a_i_i : vec3<f32> = *(p_a_2_1);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl
new file mode 100644
index 0000000..3aeddf2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].zxy);
+    let a = abs(u[0][1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3ba88af
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+float2x3 tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x2 t = transpose(tint_symbol(u, 64u));
+  const float l = length(asfloat(u[1].xyz).zxy);
+  const float a = abs(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..3ba88af
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+float2x3 tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x2 t = transpose(tint_symbol(u, 64u));
+  const float l = length(asfloat(u[1].xyz).zxy);
+  const float a = abs(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..a3ae889
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x3 inner[4];
+} u;
+
+void f() {
+  mat3x2 t = transpose(u.inner[2]);
+  float l = length(u.inner[0][1].zxy);
+  float a = abs(u.inner[0][1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..afe9bcc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float2x3, 4>* tint_symbol [[buffer(0)]]) {
+  float3x2 const t = transpose((*(tint_symbol))[2]);
+  float const l = length(float3((*(tint_symbol))[0][1]).zxy);
+  float const a = fabs(float3((*(tint_symbol))[0][1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..b70cf36
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,58 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+         %24 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v3float_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v3float_uint_4 = OpTypeArray %mat2v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+    %v2float = OpTypeVector %float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+         %25 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0 %int_2
+         %22 = OpLoad %mat2v3float %21
+         %14 = OpTranspose %mat3v2float %22
+         %28 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %25 %int_1
+         %29 = OpLoad %v3float %28
+         %30 = OpVectorShuffle %v3float %29 %29 2 0 1
+         %23 = OpExtInst %float %24 Length %30
+         %32 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %25 %int_1
+         %33 = OpLoad %v3float %32
+         %34 = OpVectorShuffle %v3float %33 %33 2 0 1
+         %35 = OpCompositeExtract %float %34 0
+         %31 = OpExtInst %float %24 FAbs %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..e33a24c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].zxy);
+  let a = abs(u[0][1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl
new file mode 100644
index 0000000..3bfbd27
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f32>, 4>;
+
+fn a(a : array<mat2x3<f32>, 4>) {}
+fn b(m : mat2x3<f32>) {}
+fn c(v : vec3<f32>) {}
+fn d(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].zxy);
+    d(u[1][0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9c293d7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(float2x3 a_1[4]) {
+}
+
+void b(float2x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+float2x3 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 32u));
+  c(asfloat(u[2].xyz).zxy);
+  d(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..9c293d7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(float2x3 a_1[4]) {
+}
+
+void b(float2x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+float2x3 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 32u));
+  c(asfloat(u[2].xyz).zxy);
+  d(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..14132c9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x3 inner[4];
+} u;
+
+void a(mat2x3 a_1[4]) {
+}
+
+void b(mat2x3 m) {
+}
+
+void c(vec3 v) {
+}
+
+void d(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  c(u.inner[1][0].zxy);
+  d(u.inner[1][0].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..e15cecf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<float2x3, 4> a_1) {
+}
+
+void b(float2x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+kernel void f(const constant tint_array<float2x3, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(float3((*(tint_symbol))[1][0]).zxy);
+  d(float3((*(tint_symbol))[1][0]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..f109ca4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 52
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v3float_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v3float_uint_4 = OpTypeArray %mat2v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void %_arr_mat2v3float_uint_4
+         %15 = OpTypeFunction %void %mat2v3float
+         %19 = OpTypeFunction %void %v3float
+         %23 = OpTypeFunction %void %float
+         %27 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2v3float_uint_4 = OpTypePointer Uniform %_arr_mat2v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+         %42 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat2v3float_uint_4
+         %14 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %15
+          %m = OpFunctionParameter %mat2v3float
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %19
+          %v = OpFunctionParameter %v3float
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %23
+        %f_1 = OpFunctionParameter %float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %27
+         %29 = OpLabel
+         %33 = OpAccessChain %_ptr_Uniform__arr_mat2v3float_uint_4 %u %uint_0
+         %34 = OpLoad %_arr_mat2v3float_uint_4 %33
+         %30 = OpFunctionCall %void %a %34
+         %39 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0 %int_1
+         %40 = OpLoad %mat2v3float %39
+         %35 = OpFunctionCall %void %b %40
+         %44 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1 %42
+         %45 = OpLoad %v3float %44
+         %46 = OpVectorShuffle %v3float %45 %45 2 0 1
+         %41 = OpFunctionCall %void %c %46
+         %48 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1 %42
+         %49 = OpLoad %v3float %48
+         %50 = OpVectorShuffle %v3float %49 %49 2 0 1
+         %51 = OpCompositeExtract %float %50 0
+         %47 = OpFunctionCall %void %d %51
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..1ff40e4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f32>, 4>;
+
+fn a(a : array<mat2x3<f32>, 4>) {
+}
+
+fn b(m : mat2x3<f32>) {
+}
+
+fn c(v : vec3<f32>) {
+}
+
+fn d(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].zxy);
+  d(u[1][0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl
new file mode 100644
index 0000000..8918241
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f32>, 4>;
+var<private> p : array<mat2x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].zxy;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5f9d507
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static float2x3 p[4] = (float2x3[4])0;
+
+float2x3 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  p[1][0] = asfloat(u[1].xyz).zxy;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5f9d507
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static float2x3 p[4] = (float2x3[4])0;
+
+float2x3 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  p[1][0] = asfloat(u[1].xyz).zxy;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..67f7177
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x3 inner[4];
+} u;
+
+mat2x3 p[4] = mat2x3[4](mat2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[1][0] = u.inner[0][1].zxy;
+  p[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..4fa5a85
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float2x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<float2x3, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..ad56275
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 41
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v3float_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v3float_uint_4 = OpTypeArray %mat2v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_mat2v3float_uint_4 = OpTypePointer Private %_arr_mat2v3float_uint_4
+         %12 = OpConstantNull %_arr_mat2v3float_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat2v3float_uint_4 Private %12
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2v3float_uint_4 = OpTypePointer Uniform %_arr_mat2v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat2v3float = OpTypePointer Private %mat2v3float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+         %29 = OpConstantNull %int
+%_ptr_Private_v3float = OpTypePointer Private %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_mat2v3float_uint_4 %u %uint_0
+         %20 = OpLoad %_arr_mat2v3float_uint_4 %19
+               OpStore %p %20
+         %24 = OpAccessChain %_ptr_Private_mat2v3float %p %int_1
+         %27 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0 %int_2
+         %28 = OpLoad %mat2v3float %27
+               OpStore %24 %28
+         %31 = OpAccessChain %_ptr_Private_v3float %p %int_1 %29
+         %33 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %29 %int_1
+         %34 = OpLoad %v3float %33
+         %35 = OpVectorShuffle %v3float %34 %34 2 0 1
+               OpStore %31 %35
+         %37 = OpAccessChain %_ptr_Private_float %p %int_1 %29 %uint_0
+         %39 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %29 %int_1 %uint_0
+         %40 = OpLoad %float %39
+               OpStore %37 %40
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..a4230fd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f32>, 4>;
+
+var<private> p : array<mat2x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].zxy;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl
new file mode 100644
index 0000000..d6b940e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f32>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat2x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].zxy;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9d1bbf8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,44 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float2x3 value[4]) {
+  float2x3 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+float2x3 tint_symbol_4(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_4(u, 64u));
+  s.Store3(32u, asuint(asfloat(u[1].xyz).zxy));
+  s.Store(32u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..9d1bbf8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,44 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float2x3 value[4]) {
+  float2x3 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+float2x3 tint_symbol_4(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_4(u, 64u));
+  s.Store3(32u, asuint(asfloat(u[1].xyz).zxy));
+  s.Store(32u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..08b531f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x3 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat2x3 inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[1][0] = u.inner[0][1].zxy;
+  s.inner[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..42f3b1d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<float2x3, 4>* tint_symbol [[buffer(1)]], const constant tint_array<float2x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..e7e86aa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,71 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v3float_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v3float_uint_4 = OpTypeArray %mat2v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat2v3float_uint_4 = OpTypePointer StorageBuffer %_arr_mat2v3float_uint_4
+%_ptr_Uniform__arr_mat2v3float_uint_4 = OpTypePointer Uniform %_arr_mat2v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+         %30 = OpConstantNull %int
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %18 = OpAccessChain %_ptr_StorageBuffer__arr_mat2v3float_uint_4 %s %uint_0
+         %20 = OpAccessChain %_ptr_Uniform__arr_mat2v3float_uint_4 %u %uint_0
+         %21 = OpLoad %_arr_mat2v3float_uint_4 %20
+               OpStore %18 %21
+         %25 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %s %uint_0 %int_1
+         %28 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0 %int_2
+         %29 = OpLoad %mat2v3float %28
+               OpStore %25 %29
+         %32 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1 %30
+         %34 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %30 %int_1
+         %35 = OpLoad %v3float %34
+         %36 = OpVectorShuffle %v3float %35 %35 2 0 1
+               OpStore %32 %36
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %int_1 %30 %uint_0
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %30 %int_1 %uint_0
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..2b23d80
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f32>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat2x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].zxy;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..a79e04a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f32>, 4>;
+var<workgroup> w : array<mat2x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].zxy;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0b13b88
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,45 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared float2x3 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x3 tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float2x3((0.0f).xxx, (0.0f).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  w[1][0] = asfloat(u[1].xyz).zxy;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0b13b88
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,45 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared float2x3 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x3 tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+typedef float2x3 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  float2x3 arr[4] = (float2x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float2x3((0.0f).xxx, (0.0f).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  w[1][0] = asfloat(u[1].xyz).zxy;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..0c18521
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x3 inner[4];
+} u;
+
+shared mat2x3 w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = mat2x3(vec3(0.0f), vec3(0.0f));
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[1][0] = u.inner[0][1].zxy;
+  w[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..d63ff3d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<float2x3, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<float2x3, 4>* const tint_symbol, const constant tint_array<float2x3, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = float2x3(float3(0.0f), float3(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<float2x3, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<float2x3, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..98ee0ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,115 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 70
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v3float_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v3float_uint_4 = OpTypeArray %mat2v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_mat2v3float_uint_4 = OpTypePointer Workgroup %_arr_mat2v3float_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat2v3float_uint_4 Workgroup
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %21 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_mat2v3float = OpTypePointer Workgroup %mat2v3float
+         %35 = OpConstantNull %mat2v3float
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2v3float_uint_4 = OpTypePointer Uniform %_arr_mat2v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+         %53 = OpConstantNull %int
+%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %65 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %14
+%local_invocation_index = OpFunctionParameter %uint
+         %18 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %21
+               OpStore %idx %local_invocation_index
+               OpBranch %22
+         %22 = OpLabel
+               OpLoopMerge %23 %24 None
+               OpBranch %25
+         %25 = OpLabel
+         %27 = OpLoad %uint %idx
+         %28 = OpULessThan %bool %27 %uint_4
+         %26 = OpLogicalNot %bool %28
+               OpSelectionMerge %30 None
+               OpBranchConditional %26 %31 %30
+         %31 = OpLabel
+               OpBranch %23
+         %30 = OpLabel
+         %32 = OpLoad %uint %idx
+         %34 = OpAccessChain %_ptr_Workgroup_mat2v3float %w %32
+               OpStore %34 %35
+               OpBranch %24
+         %24 = OpLabel
+         %36 = OpLoad %uint %idx
+         %38 = OpIAdd %uint %36 %uint_1
+               OpStore %idx %38
+               OpBranch %22
+         %23 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %44 = OpAccessChain %_ptr_Uniform__arr_mat2v3float_uint_4 %u %uint_0
+         %45 = OpLoad %_arr_mat2v3float_uint_4 %44
+               OpStore %w %45
+         %48 = OpAccessChain %_ptr_Workgroup_mat2v3float %w %int_1
+         %51 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0 %int_2
+         %52 = OpLoad %mat2v3float %51
+               OpStore %48 %52
+         %55 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1 %53
+         %57 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %53 %int_1
+         %58 = OpLoad %v3float %57
+         %59 = OpVectorShuffle %v3float %58 %58 2 0 1
+               OpStore %55 %59
+         %61 = OpAccessChain %_ptr_Workgroup_float %w %int_1 %53 %uint_0
+         %63 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %53 %int_1 %uint_0
+         %64 = OpLoad %float %63
+               OpStore %61 %64
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %65
+         %67 = OpLabel
+         %69 = OpLoad %uint %local_invocation_index_1
+         %68 = OpFunctionCall %void %f_inner %69
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..c34e03b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x3<f32>, 4>;
+
+var<workgroup> w : array<mat2x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].zxy;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..0474ac2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x4<f16>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat2x4<f16>, 4> = *p_a;
+  let l_a_i     : mat2x4<f16>           = *p_a_i;
+  let l_a_i_i   : vec4<f16>             = *p_a_i_i;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..fe06356
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,49 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 4> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 2, 4> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 2, 4> l_a_i = tint_symbol_1(a, (16u * uint(p_a_i_save)));
+  const uint scalar_offset_2 = (((16u * uint(p_a_i_save)) + (8u * uint(p_a_i_i_save)))) / 4;
+  uint4 ubo_load_5 = a[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_a_i_i = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..08e7c35
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,54 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 4> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 2, 4> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 2, 4> l_a_i = tint_symbol_1(a, (16u * uint(p_a_i_save)));
+  const uint scalar_offset_2 = (((16u * uint(p_a_i_save)) + (8u * uint(p_a_i_i_save)))) / 4;
+  uint4 ubo_load_5 = a[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_a_i_i = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000015AD15A89A0(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..550a108
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,65 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat2x4_f16 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat2x4 conv_mat2x4_f16(mat2x4_f16 val) {
+  return f16mat2x4(val.col0, val.col1);
+}
+
+f16mat2x4[4] conv_arr4_mat2x4_f16(mat2x4_f16 val[4]) {
+  f16mat2x4 arr[4] = f16mat2x4[4](f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16vec4 load_a_inner_p0_p1(uint p0, uint p1) {
+  switch(p1) {
+    case 0u: {
+      return a.inner[p0].col0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].col1;
+      break;
+    }
+    default: {
+      return f16vec4(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat2x4 p_a[4] = conv_arr4_mat2x4_f16(a.inner);
+  int tint_symbol = i();
+  f16mat2x4 p_a_i = conv_mat2x4_f16(a.inner[tint_symbol]);
+  int tint_symbol_1 = i();
+  f16vec4 p_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+  f16mat2x4 l_a[4] = conv_arr4_mat2x4_f16(a.inner);
+  f16mat2x4 l_a_i = conv_mat2x4_f16(a.inner[tint_symbol]);
+  f16vec4 l_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..7ff60c2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<half2x4, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<half2x4, 4> const l_a = *(tint_symbol_3);
+  half2x4 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  half4 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..9ff46e7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,165 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 98
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat2x4_f16 "mat2x4_f16"
+               OpMemberName %mat2x4_f16 0 "col0"
+               OpMemberName %mat2x4_f16 1 "col1"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %conv_mat2x4_f16 "conv_mat2x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x4_f16 "conv_arr4_mat2x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_p1 "load_a_inner_p0_p1"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 1 Offset 8
+               OpDecorate %_arr_mat2x4_f16_uint_4 ArrayStride 16
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat2v4half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat2x4_f16 = OpTypeStruct %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x4_f16_uint_4 = OpTypeArray %mat2x4_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat2x4_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat2v4half = OpTypeMatrix %v4half 2
+         %21 = OpTypeFunction %mat2v4half %mat2x4_f16
+%_arr_mat2v4half_uint_4 = OpTypeArray %mat2v4half %uint_4
+         %29 = OpTypeFunction %_arr_mat2v4half_uint_4 %_arr_mat2x4_f16_uint_4
+%_ptr_Function__arr_mat2v4half_uint_4 = OpTypePointer Function %_arr_mat2v4half_uint_4
+         %36 = OpConstantNull %_arr_mat2v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %39 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x4_f16_uint_4 = OpTypePointer Function %_arr_mat2x4_f16_uint_4
+         %52 = OpConstantNull %_arr_mat2x4_f16_uint_4
+%_ptr_Function_mat2v4half = OpTypePointer Function %mat2v4half
+%_ptr_Function_mat2x4_f16 = OpTypePointer Function %mat2x4_f16
+     %uint_1 = OpConstant %uint 1
+         %65 = OpTypeFunction %v4half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+         %80 = OpConstantNull %v4half
+       %void = OpTypeVoid
+         %81 = OpTypeFunction %void
+%_ptr_Uniform__arr_mat2x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x4_f16_uint_4
+%_ptr_Uniform_mat2x4_f16 = OpTypePointer Uniform %mat2x4_f16
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+%conv_mat2x4_f16 = OpFunction %mat2v4half None %21
+        %val = OpFunctionParameter %mat2x4_f16
+         %25 = OpLabel
+         %26 = OpCompositeExtract %v4half %val 0
+         %27 = OpCompositeExtract %v4half %val 1
+         %28 = OpCompositeConstruct %mat2v4half %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_mat2x4_f16 = OpFunction %_arr_mat2v4half_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_mat2x4_f16_uint_4
+         %33 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v4half_uint_4 Function %36
+        %i_0 = OpVariable %_ptr_Function_uint Function %39
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x4_f16_uint_4 Function %52
+               OpBranch %40
+         %40 = OpLabel
+               OpLoopMerge %41 %42 None
+               OpBranch %43
+         %43 = OpLabel
+         %45 = OpLoad %uint %i_0
+         %46 = OpULessThan %bool %45 %uint_4
+         %44 = OpLogicalNot %bool %46
+               OpSelectionMerge %48 None
+               OpBranchConditional %44 %49 %48
+         %49 = OpLabel
+               OpBranch %41
+         %48 = OpLabel
+               OpStore %var_for_index %val_0
+         %53 = OpLoad %uint %i_0
+         %55 = OpAccessChain %_ptr_Function_mat2v4half %arr %53
+         %57 = OpLoad %uint %i_0
+         %59 = OpAccessChain %_ptr_Function_mat2x4_f16 %var_for_index %57
+         %60 = OpLoad %mat2x4_f16 %59
+         %56 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %60
+               OpStore %55 %56
+               OpBranch %42
+         %42 = OpLabel
+         %61 = OpLoad %uint %i_0
+         %63 = OpIAdd %uint %61 %uint_1
+               OpStore %i_0 %63
+               OpBranch %40
+         %41 = OpLabel
+         %64 = OpLoad %_arr_mat2v4half_uint_4 %arr
+               OpReturnValue %64
+               OpFunctionEnd
+%load_a_inner_p0_p1 = OpFunction %v4half None %65
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+         %69 = OpLabel
+               OpSelectionMerge %70 None
+               OpSwitch %p1 %71 0 %72 1 %73
+         %72 = OpLabel
+         %76 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0 %uint_0
+         %77 = OpLoad %v4half %76
+               OpReturnValue %77
+         %73 = OpLabel
+         %78 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0 %uint_1
+         %79 = OpLoad %v4half %78
+               OpReturnValue %79
+         %71 = OpLabel
+               OpReturnValue %80
+         %70 = OpLabel
+               OpReturnValue %80
+               OpFunctionEnd
+          %f = OpFunction %void None %81
+         %84 = OpLabel
+         %85 = OpFunctionCall %int %i
+         %86 = OpFunctionCall %int %i
+         %89 = OpAccessChain %_ptr_Uniform__arr_mat2x4_f16_uint_4 %a %uint_0
+         %90 = OpLoad %_arr_mat2x4_f16_uint_4 %89
+         %87 = OpFunctionCall %_arr_mat2v4half_uint_4 %conv_arr4_mat2x4_f16 %90
+         %93 = OpAccessChain %_ptr_Uniform_mat2x4_f16 %a %uint_0 %85
+         %94 = OpLoad %mat2x4_f16 %93
+         %91 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %94
+         %96 = OpBitcast %uint %85
+         %97 = OpBitcast %uint %86
+         %95 = OpFunctionCall %v4half %load_a_inner_p0_p1 %96 %97
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..3593341
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x4<f16>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat2x4<f16>, 4> = *(p_a);
+  let l_a_i : mat2x4<f16> = *(p_a_i);
+  let l_a_i_i : vec4<f16> = *(p_a_i_i);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..d3b62e3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,14 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat2x4<f16>, 4> = *p_a;
+  let l_a_i     : mat2x4<f16>           = *p_a_2;
+  let l_a_i_i   : vec4<f16>             = *p_a_2_1;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3632bea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,39 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+
+matrix<float16_t, 2, 4> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 4> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 2, 4> l_a_i = tint_symbol_1(a, 32u);
+  uint2 ubo_load_4 = a[2].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_a_i_i = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d3aaa7e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,44 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+
+matrix<float16_t, 2, 4> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 4> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 2, 4> l_a_i = tint_symbol_1(a, 32u);
+  uint2 ubo_load_4 = a[2].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_a_i_i = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000192D71EE4C0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..4ef2ff5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,40 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat2x4_f16 inner[4];
+} a;
+
+f16mat2x4 conv_mat2x4_f16(mat2x4_f16 val) {
+  return f16mat2x4(val.col0, val.col1);
+}
+
+f16mat2x4[4] conv_arr4_mat2x4_f16(mat2x4_f16 val[4]) {
+  f16mat2x4 arr[4] = f16mat2x4[4](f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  f16mat2x4 p_a[4] = conv_arr4_mat2x4_f16(a.inner);
+  f16mat2x4 p_a_2 = conv_mat2x4_f16(a.inner[2u]);
+  f16vec4 p_a_2_1 = a.inner[2u].col1;
+  f16mat2x4 l_a[4] = conv_arr4_mat2x4_f16(a.inner);
+  f16mat2x4 l_a_i = conv_mat2x4_f16(a.inner[2u]);
+  f16vec4 l_a_i_i = a.inner[2u].col1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..8547394
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half2x4, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<half2x4, 4> const l_a = *(tint_symbol);
+  half2x4 const l_a_i = (*(tint_symbol))[2];
+  half4 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..4f7edfd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,123 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 71
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat2x4_f16 "mat2x4_f16"
+               OpMemberName %mat2x4_f16 0 "col0"
+               OpMemberName %mat2x4_f16 1 "col1"
+               OpName %a "a"
+               OpName %conv_mat2x4_f16 "conv_mat2x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x4_f16 "conv_arr4_mat2x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 1 Offset 8
+               OpDecorate %_arr_mat2x4_f16_uint_4 ArrayStride 16
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat2v4half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat2x4_f16 = OpTypeStruct %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x4_f16_uint_4 = OpTypeArray %mat2x4_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat2x4_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+         %10 = OpTypeFunction %mat2v4half %mat2x4_f16
+%_arr_mat2v4half_uint_4 = OpTypeArray %mat2v4half %uint_4
+         %18 = OpTypeFunction %_arr_mat2v4half_uint_4 %_arr_mat2x4_f16_uint_4
+%_ptr_Function__arr_mat2v4half_uint_4 = OpTypePointer Function %_arr_mat2v4half_uint_4
+         %25 = OpConstantNull %_arr_mat2v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %28 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x4_f16_uint_4 = OpTypePointer Function %_arr_mat2x4_f16_uint_4
+         %41 = OpConstantNull %_arr_mat2x4_f16_uint_4
+%_ptr_Function_mat2v4half = OpTypePointer Function %mat2v4half
+%_ptr_Function_mat2x4_f16 = OpTypePointer Function %mat2x4_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %54 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x4_f16_uint_4
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat2x4_f16 = OpTypePointer Uniform %mat2x4_f16
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+%conv_mat2x4_f16 = OpFunction %mat2v4half None %10
+        %val = OpFunctionParameter %mat2x4_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v4half %val 0
+         %16 = OpCompositeExtract %v4half %val 1
+         %17 = OpCompositeConstruct %mat2v4half %15 %16
+               OpReturnValue %17
+               OpFunctionEnd
+%conv_arr4_mat2x4_f16 = OpFunction %_arr_mat2v4half_uint_4 None %18
+      %val_0 = OpFunctionParameter %_arr_mat2x4_f16_uint_4
+         %22 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v4half_uint_4 Function %25
+          %i = OpVariable %_ptr_Function_uint Function %28
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x4_f16_uint_4 Function %41
+               OpBranch %29
+         %29 = OpLabel
+               OpLoopMerge %30 %31 None
+               OpBranch %32
+         %32 = OpLabel
+         %34 = OpLoad %uint %i
+         %35 = OpULessThan %bool %34 %uint_4
+         %33 = OpLogicalNot %bool %35
+               OpSelectionMerge %37 None
+               OpBranchConditional %33 %38 %37
+         %38 = OpLabel
+               OpBranch %30
+         %37 = OpLabel
+               OpStore %var_for_index %val_0
+         %42 = OpLoad %uint %i
+         %44 = OpAccessChain %_ptr_Function_mat2v4half %arr %42
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_mat2x4_f16 %var_for_index %46
+         %49 = OpLoad %mat2x4_f16 %48
+         %45 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %49
+               OpStore %44 %45
+               OpBranch %31
+         %31 = OpLabel
+         %50 = OpLoad %uint %i
+         %52 = OpIAdd %uint %50 %uint_1
+               OpStore %i %52
+               OpBranch %29
+         %30 = OpLabel
+         %53 = OpLoad %_arr_mat2v4half_uint_4 %arr
+               OpReturnValue %53
+               OpFunctionEnd
+          %f = OpFunction %void None %54
+         %57 = OpLabel
+         %61 = OpAccessChain %_ptr_Uniform__arr_mat2x4_f16_uint_4 %a %uint_0
+         %62 = OpLoad %_arr_mat2x4_f16_uint_4 %61
+         %58 = OpFunctionCall %_arr_mat2v4half_uint_4 %conv_arr4_mat2x4_f16 %62
+         %66 = OpAccessChain %_ptr_Uniform_mat2x4_f16 %a %uint_0 %uint_2
+         %67 = OpLoad %mat2x4_f16 %66
+         %63 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %67
+         %69 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %uint_2 %uint_1
+         %70 = OpLoad %v4half %69
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..902e580
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat2x4<f16>, 4> = *(p_a);
+  let l_a_i : mat2x4<f16> = *(p_a_2);
+  let l_a_i_i : vec4<f16> = *(p_a_2_1);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl
new file mode 100644
index 0000000..3da6797
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].ywxz);
+    let a = abs(u[0][1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b6a103e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 2> t = transpose(tint_symbol(u, 32u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  uint2 ubo_load_5 = u[0].zw;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..549f1f1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,36 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 2> t = transpose(tint_symbol(u, 32u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  uint2 ubo_load_5 = u[0].zw;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000268E1507730(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..dc144ba
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,27 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x4_f16 inner[4];
+} u;
+
+f16mat2x4 conv_mat2x4_f16(mat2x4_f16 val) {
+  return f16mat2x4(val.col0, val.col1);
+}
+
+void f() {
+  f16mat4x2 t = transpose(conv_mat2x4_f16(u.inner[2u]));
+  float16_t l = length(u.inner[0u].col1.ywxz);
+  float16_t a = abs(u.inner[0u].col1.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..d3fc1d4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half2x4, 4>* tint_symbol [[buffer(0)]]) {
+  half4x2 const t = transpose((*(tint_symbol))[2]);
+  half const l = length(half4((*(tint_symbol))[0][1]).ywxz);
+  half const a = fabs(half4((*(tint_symbol))[0][1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..3a6cf21
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %32 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat2x4_f16 "mat2x4_f16"
+               OpMemberName %mat2x4_f16 0 "col0"
+               OpMemberName %mat2x4_f16 1 "col1"
+               OpName %u "u"
+               OpName %conv_mat2x4_f16 "conv_mat2x4_f16"
+               OpName %val "val"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 1 Offset 8
+               OpDecorate %_arr_mat2x4_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat2x4_f16 = OpTypeStruct %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x4_f16_uint_4 = OpTypeArray %mat2x4_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat2x4_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+         %10 = OpTypeFunction %mat2v4half %mat2x4_f16
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+     %v2half = OpTypeVector %half 2
+ %mat4v2half = OpTypeMatrix %v2half 4
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat2x4_f16 = OpTypePointer Uniform %mat2x4_f16
+         %33 = OpConstantNull %uint
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+%conv_mat2x4_f16 = OpFunction %mat2v4half None %10
+        %val = OpFunctionParameter %mat2x4_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v4half %val 0
+         %16 = OpCompositeExtract %v4half %val 1
+         %17 = OpCompositeConstruct %mat2v4half %15 %16
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %29 = OpAccessChain %_ptr_Uniform_mat2x4_f16 %u %uint_0 %uint_2
+         %30 = OpLoad %mat2x4_f16 %29
+         %25 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %30
+         %22 = OpTranspose %mat4v2half %25
+         %36 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %33 %uint_1
+         %37 = OpLoad %v4half %36
+         %38 = OpVectorShuffle %v4half %37 %37 1 3 0 2
+         %31 = OpExtInst %half %32 Length %38
+         %40 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %33 %uint_1
+         %41 = OpLoad %v4half %40
+         %42 = OpVectorShuffle %v4half %41 %41 1 3 0 2
+         %43 = OpCompositeExtract %half %42 0
+         %39 = OpExtInst %half %32 FAbs %43
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..981ed12
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].ywxz);
+  let a = abs(u[0][1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl
new file mode 100644
index 0000000..41ad327
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f16>, 4>;
+
+fn a(a : array<mat2x4<f16>, 4>) {}
+fn b(m : mat2x4<f16>) {}
+fn c(v : vec4<f16>) {}
+fn d(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].ywxz);
+    d(u[1][0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f249b86
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,55 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+void a(matrix<float16_t, 2, 4> a_1[4]) {
+}
+
+void b(matrix<float16_t, 2, 4> m) {
+}
+
+void c(vector<float16_t, 4> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 4> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 16u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  c(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  d(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8cd4d42
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,63 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+void a(matrix<float16_t, 2, 4> a_1[4]) {
+}
+
+void b(matrix<float16_t, 2, 4> m) {
+}
+
+void c(vector<float16_t, 4> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 4> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 16u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  c(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  d(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001E7EB824620(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001E7EB824620(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001E7EB824620(11,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001E7EB824620(14,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..4d6ba80
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,50 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x4_f16 inner[4];
+} u;
+
+void a(f16mat2x4 a_1[4]) {
+}
+
+void b(f16mat2x4 m) {
+}
+
+void c(f16vec4 v) {
+}
+
+void d(float16_t f_1) {
+}
+
+f16mat2x4 conv_mat2x4_f16(mat2x4_f16 val) {
+  return f16mat2x4(val.col0, val.col1);
+}
+
+f16mat2x4[4] conv_arr4_mat2x4_f16(mat2x4_f16 val[4]) {
+  f16mat2x4 arr[4] = f16mat2x4[4](f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  a(conv_arr4_mat2x4_f16(u.inner));
+  b(conv_mat2x4_f16(u.inner[1u]));
+  c(u.inner[1u].col0.ywxz);
+  d(u.inner[1u].col0.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..c5b42bf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<half2x4, 4> a_1) {
+}
+
+void b(half2x4 m) {
+}
+
+void c(half4 v) {
+}
+
+void d(half f_1) {
+}
+
+kernel void f(const constant tint_array<half2x4, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(half4((*(tint_symbol))[1][0]).ywxz);
+  d(half4((*(tint_symbol))[1][0]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..a69ffdf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,163 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 95
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat2x4_f16 "mat2x4_f16"
+               OpMemberName %mat2x4_f16 0 "col0"
+               OpMemberName %mat2x4_f16 1 "col1"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %conv_mat2x4_f16 "conv_mat2x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x4_f16 "conv_arr4_mat2x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 1 Offset 8
+               OpDecorate %_arr_mat2x4_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat2v4half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat2x4_f16 = OpTypeStruct %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x4_f16_uint_4 = OpTypeArray %mat2x4_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat2x4_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat2v4half = OpTypeMatrix %v4half 2
+%_arr_mat2v4half_uint_4 = OpTypeArray %mat2v4half %uint_4
+         %10 = OpTypeFunction %void %_arr_mat2v4half_uint_4
+         %17 = OpTypeFunction %void %mat2v4half
+         %21 = OpTypeFunction %void %v4half
+         %25 = OpTypeFunction %void %half
+         %29 = OpTypeFunction %mat2v4half %mat2x4_f16
+         %36 = OpTypeFunction %_arr_mat2v4half_uint_4 %_arr_mat2x4_f16_uint_4
+%_ptr_Function__arr_mat2v4half_uint_4 = OpTypePointer Function %_arr_mat2v4half_uint_4
+         %42 = OpConstantNull %_arr_mat2v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %45 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x4_f16_uint_4 = OpTypePointer Function %_arr_mat2x4_f16_uint_4
+         %58 = OpConstantNull %_arr_mat2x4_f16_uint_4
+%_ptr_Function_mat2v4half = OpTypePointer Function %mat2v4half
+%_ptr_Function_mat2x4_f16 = OpTypePointer Function %mat2x4_f16
+     %uint_1 = OpConstant %uint 1
+         %71 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x4_f16_uint_4
+%_ptr_Uniform_mat2x4_f16 = OpTypePointer Uniform %mat2x4_f16
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat2v4half_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %m = OpFunctionParameter %mat2v4half
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %v = OpFunctionParameter %v4half
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+        %f_1 = OpFunctionParameter %half
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%conv_mat2x4_f16 = OpFunction %mat2v4half None %29
+        %val = OpFunctionParameter %mat2x4_f16
+         %32 = OpLabel
+         %33 = OpCompositeExtract %v4half %val 0
+         %34 = OpCompositeExtract %v4half %val 1
+         %35 = OpCompositeConstruct %mat2v4half %33 %34
+               OpReturnValue %35
+               OpFunctionEnd
+%conv_arr4_mat2x4_f16 = OpFunction %_arr_mat2v4half_uint_4 None %36
+      %val_0 = OpFunctionParameter %_arr_mat2x4_f16_uint_4
+         %39 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v4half_uint_4 Function %42
+          %i = OpVariable %_ptr_Function_uint Function %45
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x4_f16_uint_4 Function %58
+               OpBranch %46
+         %46 = OpLabel
+               OpLoopMerge %47 %48 None
+               OpBranch %49
+         %49 = OpLabel
+         %51 = OpLoad %uint %i
+         %52 = OpULessThan %bool %51 %uint_4
+         %50 = OpLogicalNot %bool %52
+               OpSelectionMerge %54 None
+               OpBranchConditional %50 %55 %54
+         %55 = OpLabel
+               OpBranch %47
+         %54 = OpLabel
+               OpStore %var_for_index %val_0
+         %59 = OpLoad %uint %i
+         %61 = OpAccessChain %_ptr_Function_mat2v4half %arr %59
+         %63 = OpLoad %uint %i
+         %65 = OpAccessChain %_ptr_Function_mat2x4_f16 %var_for_index %63
+         %66 = OpLoad %mat2x4_f16 %65
+         %62 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %66
+               OpStore %61 %62
+               OpBranch %48
+         %48 = OpLabel
+         %67 = OpLoad %uint %i
+         %69 = OpIAdd %uint %67 %uint_1
+               OpStore %i %69
+               OpBranch %46
+         %47 = OpLabel
+         %70 = OpLoad %_arr_mat2v4half_uint_4 %arr
+               OpReturnValue %70
+               OpFunctionEnd
+          %f = OpFunction %void None %71
+         %73 = OpLabel
+         %78 = OpAccessChain %_ptr_Uniform__arr_mat2x4_f16_uint_4 %u %uint_0
+         %79 = OpLoad %_arr_mat2x4_f16_uint_4 %78
+         %75 = OpFunctionCall %_arr_mat2v4half_uint_4 %conv_arr4_mat2x4_f16 %79
+         %74 = OpFunctionCall %void %a %75
+         %83 = OpAccessChain %_ptr_Uniform_mat2x4_f16 %u %uint_0 %uint_1
+         %84 = OpLoad %mat2x4_f16 %83
+         %81 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %84
+         %80 = OpFunctionCall %void %b %81
+         %87 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %uint_1 %uint_0
+         %88 = OpLoad %v4half %87
+         %89 = OpVectorShuffle %v4half %88 %88 1 3 0 2
+         %85 = OpFunctionCall %void %c %89
+         %91 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %uint_1 %uint_0
+         %92 = OpLoad %v4half %91
+         %93 = OpVectorShuffle %v4half %92 %92 1 3 0 2
+         %94 = OpCompositeExtract %half %93 0
+         %90 = OpFunctionCall %void %d %94
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..fc4ec28
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,23 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f16>, 4>;
+
+fn a(a : array<mat2x4<f16>, 4>) {
+}
+
+fn b(m : mat2x4<f16>) {
+}
+
+fn c(v : vec4<f16>) {
+}
+
+fn d(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].ywxz);
+  d(u[1][0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl
new file mode 100644
index 0000000..7da57a0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f16>, 4>;
+var<private> p : array<mat2x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].ywxz;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..245b619
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+static matrix<float16_t, 2, 4> p[4] = (matrix<float16_t, 2, 4>[4])0;
+
+matrix<float16_t, 2, 4> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 32u);
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  p[1][0] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz;
+  p[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..db51daf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+static matrix<float16_t, 2, 4> p[4] = (matrix<float16_t, 2, 4>[4])0;
+
+matrix<float16_t, 2, 4> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 32u);
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  p[1][0] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz;
+  p[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000022CF2B4B400(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..5fc6402
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,39 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x4_f16 inner[4];
+} u;
+
+f16mat2x4 p[4] = f16mat2x4[4](f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+f16mat2x4 conv_mat2x4_f16(mat2x4_f16 val) {
+  return f16mat2x4(val.col0, val.col1);
+}
+
+f16mat2x4[4] conv_arr4_mat2x4_f16(mat2x4_f16 val[4]) {
+  f16mat2x4 arr[4] = f16mat2x4[4](f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  p = conv_arr4_mat2x4_f16(u.inner);
+  p[1] = conv_mat2x4_f16(u.inner[2u]);
+  p[1][0] = u.inner[0u].col1.ywxz;
+  p[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..2ebeac8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half2x4, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<half2x4, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = half4((*(tint_symbol_1))[0][1]).ywxz;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..e7366e1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,143 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 86
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat2x4_f16 "mat2x4_f16"
+               OpMemberName %mat2x4_f16 0 "col0"
+               OpMemberName %mat2x4_f16 1 "col1"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %conv_mat2x4_f16 "conv_mat2x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x4_f16 "conv_arr4_mat2x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 1 Offset 8
+               OpDecorate %_arr_mat2x4_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat2v4half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat2x4_f16 = OpTypeStruct %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x4_f16_uint_4 = OpTypeArray %mat2x4_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat2x4_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+%_arr_mat2v4half_uint_4 = OpTypeArray %mat2v4half %uint_4
+%_ptr_Private__arr_mat2v4half_uint_4 = OpTypePointer Private %_arr_mat2v4half_uint_4
+         %14 = OpConstantNull %_arr_mat2v4half_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat2v4half_uint_4 Private %14
+         %15 = OpTypeFunction %mat2v4half %mat2x4_f16
+         %22 = OpTypeFunction %_arr_mat2v4half_uint_4 %_arr_mat2x4_f16_uint_4
+%_ptr_Function__arr_mat2v4half_uint_4 = OpTypePointer Function %_arr_mat2v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %30 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x4_f16_uint_4 = OpTypePointer Function %_arr_mat2x4_f16_uint_4
+         %43 = OpConstantNull %_arr_mat2x4_f16_uint_4
+%_ptr_Function_mat2v4half = OpTypePointer Function %mat2v4half
+%_ptr_Function_mat2x4_f16 = OpTypePointer Function %mat2x4_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %56 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x4_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat2v4half = OpTypePointer Private %mat2v4half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat2x4_f16 = OpTypePointer Uniform %mat2x4_f16
+         %74 = OpConstantNull %int
+%_ptr_Private_v4half = OpTypePointer Private %v4half
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+%_ptr_Private_half = OpTypePointer Private %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat2x4_f16 = OpFunction %mat2v4half None %15
+        %val = OpFunctionParameter %mat2x4_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v4half %val 0
+         %20 = OpCompositeExtract %v4half %val 1
+         %21 = OpCompositeConstruct %mat2v4half %19 %20
+               OpReturnValue %21
+               OpFunctionEnd
+%conv_arr4_mat2x4_f16 = OpFunction %_arr_mat2v4half_uint_4 None %22
+      %val_0 = OpFunctionParameter %_arr_mat2x4_f16_uint_4
+         %25 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v4half_uint_4 Function %14
+          %i = OpVariable %_ptr_Function_uint Function %30
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x4_f16_uint_4 Function %43
+               OpBranch %31
+         %31 = OpLabel
+               OpLoopMerge %32 %33 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %uint %i
+         %37 = OpULessThan %bool %36 %uint_4
+         %35 = OpLogicalNot %bool %37
+               OpSelectionMerge %39 None
+               OpBranchConditional %35 %40 %39
+         %40 = OpLabel
+               OpBranch %32
+         %39 = OpLabel
+               OpStore %var_for_index %val_0
+         %44 = OpLoad %uint %i
+         %46 = OpAccessChain %_ptr_Function_mat2v4half %arr %44
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_mat2x4_f16 %var_for_index %48
+         %51 = OpLoad %mat2x4_f16 %50
+         %47 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %51
+               OpStore %46 %47
+               OpBranch %33
+         %33 = OpLabel
+         %52 = OpLoad %uint %i
+         %54 = OpIAdd %uint %52 %uint_1
+               OpStore %i %54
+               OpBranch %31
+         %32 = OpLabel
+         %55 = OpLoad %_arr_mat2v4half_uint_4 %arr
+               OpReturnValue %55
+               OpFunctionEnd
+          %f = OpFunction %void None %56
+         %59 = OpLabel
+         %63 = OpAccessChain %_ptr_Uniform__arr_mat2x4_f16_uint_4 %u %uint_0
+         %64 = OpLoad %_arr_mat2x4_f16_uint_4 %63
+         %60 = OpFunctionCall %_arr_mat2v4half_uint_4 %conv_arr4_mat2x4_f16 %64
+               OpStore %p %60
+         %68 = OpAccessChain %_ptr_Private_mat2v4half %p %int_1
+         %72 = OpAccessChain %_ptr_Uniform_mat2x4_f16 %u %uint_0 %uint_2
+         %73 = OpLoad %mat2x4_f16 %72
+         %69 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %73
+               OpStore %68 %69
+         %76 = OpAccessChain %_ptr_Private_v4half %p %int_1 %74
+         %78 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %30 %uint_1
+         %79 = OpLoad %v4half %78
+         %80 = OpVectorShuffle %v4half %79 %79 1 3 0 2
+               OpStore %76 %80
+         %82 = OpAccessChain %_ptr_Private_half %p %int_1 %74 %uint_0
+         %84 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %30 %uint_1 %30
+         %85 = OpLoad %half %84
+               OpStore %82 %85
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..2ee6d30
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f16>, 4>;
+
+var<private> p : array<mat2x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].ywxz;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl
new file mode 100644
index 0000000..50bb230
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f16>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat2x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].ywxz;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..d519b2e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,55 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value[4]) {
+  matrix<float16_t, 2, 4> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 16u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 2, 4> tint_symbol_4(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 16u, tint_symbol_4(u, 32u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  s.Store<vector<float16_t, 4> >(16u, vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  s.Store<float16_t>(16u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7ea2f14
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,61 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value[4]) {
+  matrix<float16_t, 2, 4> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 16u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 2, 4> tint_symbol_4(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 16u, tint_symbol_4(u, 32u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  s.Store<vector<float16_t, 4> >(16u, vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  s.Store<float16_t>(16u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000028AE9A12030(6,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000028AE9A12030(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..48cbfbf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,42 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x4_f16 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat2x4 inner[4];
+} s;
+
+f16mat2x4 conv_mat2x4_f16(mat2x4_f16 val) {
+  return f16mat2x4(val.col0, val.col1);
+}
+
+f16mat2x4[4] conv_arr4_mat2x4_f16(mat2x4_f16 val[4]) {
+  f16mat2x4 arr[4] = f16mat2x4[4](f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  s.inner = conv_arr4_mat2x4_f16(u.inner);
+  s.inner[1] = conv_mat2x4_f16(u.inner[2u]);
+  s.inner[1][0] = u.inner[0u].col1.ywxz;
+  s.inner[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..480eb6e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<half2x4, 4>* tint_symbol [[buffer(1)]], const constant tint_array<half2x4, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half4((*(tint_symbol_1))[0][1]).ywxz;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..efffc6a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,154 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 89
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat2x4_f16 "mat2x4_f16"
+               OpMemberName %mat2x4_f16 0 "col0"
+               OpMemberName %mat2x4_f16 1 "col1"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %conv_mat2x4_f16 "conv_mat2x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x4_f16 "conv_arr4_mat2x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 1 Offset 8
+               OpDecorate %_arr_mat2x4_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 8
+               OpDecorate %_arr_mat2v4half_uint_4 ArrayStride 16
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat2x4_f16 = OpTypeStruct %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x4_f16_uint_4 = OpTypeArray %mat2x4_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat2x4_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+%_arr_mat2v4half_uint_4 = OpTypeArray %mat2v4half %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v4half_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %15 = OpTypeFunction %mat2v4half %mat2x4_f16
+         %22 = OpTypeFunction %_arr_mat2v4half_uint_4 %_arr_mat2x4_f16_uint_4
+%_ptr_Function__arr_mat2v4half_uint_4 = OpTypePointer Function %_arr_mat2v4half_uint_4
+         %28 = OpConstantNull %_arr_mat2v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %31 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x4_f16_uint_4 = OpTypePointer Function %_arr_mat2x4_f16_uint_4
+         %44 = OpConstantNull %_arr_mat2x4_f16_uint_4
+%_ptr_Function_mat2v4half = OpTypePointer Function %mat2v4half
+%_ptr_Function_mat2x4_f16 = OpTypePointer Function %mat2x4_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %57 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat2v4half_uint_4 = OpTypePointer StorageBuffer %_arr_mat2v4half_uint_4
+%_ptr_Uniform__arr_mat2x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x4_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat2v4half = OpTypePointer StorageBuffer %mat2v4half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat2x4_f16 = OpTypePointer Uniform %mat2x4_f16
+         %77 = OpConstantNull %int
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat2x4_f16 = OpFunction %mat2v4half None %15
+        %val = OpFunctionParameter %mat2x4_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v4half %val 0
+         %20 = OpCompositeExtract %v4half %val 1
+         %21 = OpCompositeConstruct %mat2v4half %19 %20
+               OpReturnValue %21
+               OpFunctionEnd
+%conv_arr4_mat2x4_f16 = OpFunction %_arr_mat2v4half_uint_4 None %22
+      %val_0 = OpFunctionParameter %_arr_mat2x4_f16_uint_4
+         %25 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v4half_uint_4 Function %28
+          %i = OpVariable %_ptr_Function_uint Function %31
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x4_f16_uint_4 Function %44
+               OpBranch %32
+         %32 = OpLabel
+               OpLoopMerge %33 %34 None
+               OpBranch %35
+         %35 = OpLabel
+         %37 = OpLoad %uint %i
+         %38 = OpULessThan %bool %37 %uint_4
+         %36 = OpLogicalNot %bool %38
+               OpSelectionMerge %40 None
+               OpBranchConditional %36 %41 %40
+         %41 = OpLabel
+               OpBranch %33
+         %40 = OpLabel
+               OpStore %var_for_index %val_0
+         %45 = OpLoad %uint %i
+         %47 = OpAccessChain %_ptr_Function_mat2v4half %arr %45
+         %49 = OpLoad %uint %i
+         %51 = OpAccessChain %_ptr_Function_mat2x4_f16 %var_for_index %49
+         %52 = OpLoad %mat2x4_f16 %51
+         %48 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %52
+               OpStore %47 %48
+               OpBranch %34
+         %34 = OpLabel
+         %53 = OpLoad %uint %i
+         %55 = OpIAdd %uint %53 %uint_1
+               OpStore %i %55
+               OpBranch %32
+         %33 = OpLabel
+         %56 = OpLoad %_arr_mat2v4half_uint_4 %arr
+               OpReturnValue %56
+               OpFunctionEnd
+          %f = OpFunction %void None %57
+         %60 = OpLabel
+         %63 = OpAccessChain %_ptr_StorageBuffer__arr_mat2v4half_uint_4 %s %uint_0
+         %66 = OpAccessChain %_ptr_Uniform__arr_mat2x4_f16_uint_4 %u %uint_0
+         %67 = OpLoad %_arr_mat2x4_f16_uint_4 %66
+         %64 = OpFunctionCall %_arr_mat2v4half_uint_4 %conv_arr4_mat2x4_f16 %67
+               OpStore %63 %64
+         %71 = OpAccessChain %_ptr_StorageBuffer_mat2v4half %s %uint_0 %int_1
+         %75 = OpAccessChain %_ptr_Uniform_mat2x4_f16 %u %uint_0 %uint_2
+         %76 = OpLoad %mat2x4_f16 %75
+         %72 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %76
+               OpStore %71 %72
+         %79 = OpAccessChain %_ptr_StorageBuffer_v4half %s %uint_0 %int_1 %77
+         %81 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %31 %uint_1
+         %82 = OpLoad %v4half %81
+         %83 = OpVectorShuffle %v4half %82 %82 1 3 0 2
+               OpStore %79 %83
+         %85 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %int_1 %77 %uint_0
+         %87 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %31 %uint_1 %31
+         %88 = OpLoad %half %87
+               OpStore %85 %88
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..7807cd8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f16>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat2x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].ywxz;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..50ba4da
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f16>, 4>;
+var<workgroup> w : array<mat2x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].ywxz;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2efdd38
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,56 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+groupshared matrix<float16_t, 2, 4> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 4> tint_symbol_3(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 2, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 32u);
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  w[1][0] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz;
+  w[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ffeeb95
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,61 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+groupshared matrix<float16_t, 2, 4> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 4> tint_symbol_3(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+typedef matrix<float16_t, 2, 4> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 2, 4> arr[4] = (matrix<float16_t, 2, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 2, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 32u);
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  w[1][0] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz;
+  w[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001E5C5F5C8E0(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..7c842fe
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,46 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat2x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat2x4_f16 inner[4];
+} u;
+
+shared f16mat2x4 w[4];
+f16mat2x4 conv_mat2x4_f16(mat2x4_f16 val) {
+  return f16mat2x4(val.col0, val.col1);
+}
+
+f16mat2x4[4] conv_arr4_mat2x4_f16(mat2x4_f16 val[4]) {
+  f16mat2x4 arr[4] = f16mat2x4[4](f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat2x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = f16mat2x4(f16vec4(0.0hf), f16vec4(0.0hf));
+    }
+  }
+  barrier();
+  w = conv_arr4_mat2x4_f16(u.inner);
+  w[1] = conv_mat2x4_f16(u.inner[2u]);
+  w[1][0] = u.inner[0u].col1.ywxz;
+  w[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..3f9f184
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<half2x4, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<half2x4, 4>* const tint_symbol, const constant tint_array<half2x4, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = half2x4(half4(0.0h), half4(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half4((*(tint_symbol_1))[0][1]).ywxz;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<half2x4, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<half2x4, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..487a96d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,186 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 111
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat2x4_f16 "mat2x4_f16"
+               OpMemberName %mat2x4_f16 0 "col0"
+               OpMemberName %mat2x4_f16 1 "col1"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %conv_mat2x4_f16 "conv_mat2x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat2x4_f16 "conv_arr4_mat2x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 0 Offset 0
+               OpMemberDecorate %mat2x4_f16 1 Offset 8
+               OpDecorate %_arr_mat2x4_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat2v4half_uint_4 ArrayStride 16
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat2x4_f16 = OpTypeStruct %v4half %v4half
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2x4_f16_uint_4 = OpTypeArray %mat2x4_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat2x4_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+%_arr_mat2v4half_uint_4 = OpTypeArray %mat2v4half %uint_4
+%_ptr_Workgroup__arr_mat2v4half_uint_4 = OpTypePointer Workgroup %_arr_mat2v4half_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat2v4half_uint_4 Workgroup
+         %16 = OpTypeFunction %mat2v4half %mat2x4_f16
+         %23 = OpTypeFunction %_arr_mat2v4half_uint_4 %_arr_mat2x4_f16_uint_4
+%_ptr_Function__arr_mat2v4half_uint_4 = OpTypePointer Function %_arr_mat2v4half_uint_4
+         %29 = OpConstantNull %_arr_mat2v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %32 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat2x4_f16_uint_4 = OpTypePointer Function %_arr_mat2x4_f16_uint_4
+         %45 = OpConstantNull %_arr_mat2x4_f16_uint_4
+%_ptr_Function_mat2v4half = OpTypePointer Function %mat2v4half
+%_ptr_Function_mat2x4_f16 = OpTypePointer Function %mat2x4_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %58 = OpTypeFunction %void %uint
+%_ptr_Workgroup_mat2v4half = OpTypePointer Workgroup %mat2v4half
+         %76 = OpConstantNull %mat2v4half
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat2x4_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat2x4_f16 = OpTypePointer Uniform %mat2x4_f16
+         %94 = OpConstantNull %int
+%_ptr_Workgroup_v4half = OpTypePointer Workgroup %v4half
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %106 = OpTypeFunction %void
+%conv_mat2x4_f16 = OpFunction %mat2v4half None %16
+        %val = OpFunctionParameter %mat2x4_f16
+         %19 = OpLabel
+         %20 = OpCompositeExtract %v4half %val 0
+         %21 = OpCompositeExtract %v4half %val 1
+         %22 = OpCompositeConstruct %mat2v4half %20 %21
+               OpReturnValue %22
+               OpFunctionEnd
+%conv_arr4_mat2x4_f16 = OpFunction %_arr_mat2v4half_uint_4 None %23
+      %val_0 = OpFunctionParameter %_arr_mat2x4_f16_uint_4
+         %26 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat2v4half_uint_4 Function %29
+          %i = OpVariable %_ptr_Function_uint Function %32
+%var_for_index = OpVariable %_ptr_Function__arr_mat2x4_f16_uint_4 Function %45
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %34 %35 None
+               OpBranch %36
+         %36 = OpLabel
+         %38 = OpLoad %uint %i
+         %39 = OpULessThan %bool %38 %uint_4
+         %37 = OpLogicalNot %bool %39
+               OpSelectionMerge %41 None
+               OpBranchConditional %37 %42 %41
+         %42 = OpLabel
+               OpBranch %34
+         %41 = OpLabel
+               OpStore %var_for_index %val_0
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_mat2v4half %arr %46
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_mat2x4_f16 %var_for_index %50
+         %53 = OpLoad %mat2x4_f16 %52
+         %49 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %53
+               OpStore %48 %49
+               OpBranch %35
+         %35 = OpLabel
+         %54 = OpLoad %uint %i
+         %56 = OpIAdd %uint %54 %uint_1
+               OpStore %i %56
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %_arr_mat2v4half_uint_4 %arr
+               OpReturnValue %57
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %58
+%local_invocation_index = OpFunctionParameter %uint
+         %62 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %32
+               OpStore %idx %local_invocation_index
+               OpBranch %64
+         %64 = OpLabel
+               OpLoopMerge %65 %66 None
+               OpBranch %67
+         %67 = OpLabel
+         %69 = OpLoad %uint %idx
+         %70 = OpULessThan %bool %69 %uint_4
+         %68 = OpLogicalNot %bool %70
+               OpSelectionMerge %71 None
+               OpBranchConditional %68 %72 %71
+         %72 = OpLabel
+               OpBranch %65
+         %71 = OpLabel
+         %73 = OpLoad %uint %idx
+         %75 = OpAccessChain %_ptr_Workgroup_mat2v4half %w %73
+               OpStore %75 %76
+               OpBranch %66
+         %66 = OpLabel
+         %77 = OpLoad %uint %idx
+         %78 = OpIAdd %uint %77 %uint_1
+               OpStore %idx %78
+               OpBranch %64
+         %65 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %85 = OpAccessChain %_ptr_Uniform__arr_mat2x4_f16_uint_4 %u %uint_0
+         %86 = OpLoad %_arr_mat2x4_f16_uint_4 %85
+         %82 = OpFunctionCall %_arr_mat2v4half_uint_4 %conv_arr4_mat2x4_f16 %86
+               OpStore %w %82
+         %89 = OpAccessChain %_ptr_Workgroup_mat2v4half %w %int_1
+         %92 = OpAccessChain %_ptr_Uniform_mat2x4_f16 %u %uint_0 %uint_2
+         %93 = OpLoad %mat2x4_f16 %92
+         %90 = OpFunctionCall %mat2v4half %conv_mat2x4_f16 %93
+               OpStore %89 %90
+         %96 = OpAccessChain %_ptr_Workgroup_v4half %w %int_1 %94
+         %98 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %32 %uint_1
+         %99 = OpLoad %v4half %98
+        %100 = OpVectorShuffle %v4half %99 %99 1 3 0 2
+               OpStore %96 %100
+        %102 = OpAccessChain %_ptr_Workgroup_half %w %int_1 %94 %uint_0
+        %104 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %32 %uint_1 %32
+        %105 = OpLoad %half %104
+               OpStore %102 %105
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %106
+        %108 = OpLabel
+        %110 = OpLoad %uint %local_invocation_index_1
+        %109 = OpFunctionCall %void %f_inner %110
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..cbd73df
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f16>, 4>;
+
+var<workgroup> w : array<mat2x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].ywxz;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..c833d93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+@group(0) @binding(0) var<uniform> a : array<mat2x4<f32>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat2x4<f32>, 4> = *p_a;
+  let l_a_i     : mat2x4<f32>           = *p_a_i;
+  let l_a_i_i   : vec4<f32>             = *p_a_i_i;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2f907f8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,37 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x4 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float2x4 l_a[4] = tint_symbol(a, 0u);
+  const float2x4 l_a_i = tint_symbol_1(a, (32u * uint(p_a_i_save)));
+  const uint scalar_offset_2 = (((32u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float4 l_a_i_i = asfloat(a[scalar_offset_2 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2f907f8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,37 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x4 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float2x4 l_a[4] = tint_symbol(a, 0u);
+  const float2x4 l_a_i = tint_symbol_1(a, (32u * uint(p_a_i_save)));
+  const uint scalar_offset_2 = (((32u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float4 l_a_i_i = asfloat(a[scalar_offset_2 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..29484d7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,27 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat2x4 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_i_save = tint_symbol_1;
+  mat2x4 l_a[4] = a.inner;
+  mat2x4 l_a_i = a.inner[p_a_i_save];
+  vec4 l_a_i_i = a.inner[p_a_i_save][p_a_i_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..5566a8c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<float2x4, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<float2x4, 4> const l_a = *(tint_symbol_3);
+  float2x4 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  float4 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..be98ce5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,64 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v4float_uint_4 ArrayStride 32
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v4float_uint_4 = OpTypeArray %mat2v4float %uint_4
+    %a_block = OpTypeStruct %_arr_mat2v4float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %21 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2v4float_uint_4 = OpTypePointer Uniform %_arr_mat2v4float_uint_4
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+          %f = OpFunction %void None %21
+         %24 = OpLabel
+         %25 = OpFunctionCall %int %i
+         %26 = OpFunctionCall %int %i
+         %29 = OpAccessChain %_ptr_Uniform__arr_mat2v4float_uint_4 %a %uint_0
+         %30 = OpLoad %_arr_mat2v4float_uint_4 %29
+         %32 = OpAccessChain %_ptr_Uniform_mat2v4float %a %uint_0 %25
+         %33 = OpLoad %mat2v4float %32
+         %35 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %25 %26
+         %36 = OpLoad %v4float %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..823c9af
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+@group(0) @binding(0) var<uniform> a : array<mat2x4<f32>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat2x4<f32>, 4> = *(p_a);
+  let l_a_i : mat2x4<f32> = *(p_a_i);
+  let l_a_i_i : vec4<f32> = *(p_a_i_i);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..e71c4d7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,12 @@
+@group(0) @binding(0) var<uniform> a : array<mat2x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat2x4<f32>, 4> = *p_a;
+  let l_a_i     : mat2x4<f32>           = *p_a_2;
+  let l_a_i_i   : vec4<f32>             = *p_a_2_1;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..89fd887
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+
+float2x4 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x4 l_a[4] = tint_symbol(a, 0u);
+  const float2x4 l_a_i = tint_symbol_1(a, 64u);
+  const float4 l_a_i_i = asfloat(a[5]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..89fd887
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+
+float2x4 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x4 l_a[4] = tint_symbol(a, 0u);
+  const float2x4 l_a_i = tint_symbol_1(a, 64u);
+  const float4 l_a_i_i = asfloat(a[5]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..008e51a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat2x4 inner[4];
+} a;
+
+void f() {
+  mat2x4 l_a[4] = a.inner;
+  mat2x4 l_a_i = a.inner[2];
+  vec4 l_a_i_i = a.inner[2][1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..2cfad92
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float2x4, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<float2x4, 4> const l_a = *(tint_symbol);
+  float2x4 const l_a_i = (*(tint_symbol))[2];
+  float4 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..09f63f8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,49 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v4float_uint_4 ArrayStride 32
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v4float_uint_4 = OpTypeArray %mat2v4float %uint_4
+    %a_block = OpTypeStruct %_arr_mat2v4float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2v4float_uint_4 = OpTypePointer Uniform %_arr_mat2v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform__arr_mat2v4float_uint_4 %a %uint_0
+         %17 = OpLoad %_arr_mat2v4float_uint_4 %16
+         %21 = OpAccessChain %_ptr_Uniform_mat2v4float %a %uint_0 %int_2
+         %22 = OpLoad %mat2v4float %21
+         %25 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %int_2 %int_1
+         %26 = OpLoad %v4float %25
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..e68d299
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> a : array<mat2x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat2x4<f32>, 4> = *(p_a);
+  let l_a_i : mat2x4<f32> = *(p_a_2);
+  let l_a_i_i : vec4<f32> = *(p_a_2_1);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl
new file mode 100644
index 0000000..617ab23
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].ywxz);
+    let a = abs(u[0][1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e5ed937
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+float2x4 tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x2 t = transpose(tint_symbol(u, 64u));
+  const float l = length(asfloat(u[1]).ywxz);
+  const float a = abs(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e5ed937
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+float2x4 tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x2 t = transpose(tint_symbol(u, 64u));
+  const float l = length(asfloat(u[1]).ywxz);
+  const float a = abs(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..13fd457
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x4 inner[4];
+} u;
+
+void f() {
+  mat4x2 t = transpose(u.inner[2]);
+  float l = length(u.inner[0][1].ywxz);
+  float a = abs(u.inner[0][1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..28b3230
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float2x4, 4>* tint_symbol [[buffer(0)]]) {
+  float4x2 const t = transpose((*(tint_symbol))[2]);
+  float const l = length(float4((*(tint_symbol))[0][1]).ywxz);
+  float const a = fabs(float4((*(tint_symbol))[0][1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..17e8232
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,58 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+         %24 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v4float_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v4float_uint_4 = OpTypeArray %mat2v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+    %v2float = OpTypeVector %float 2
+%mat4v2float = OpTypeMatrix %v2float 4
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+         %25 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0 %int_2
+         %22 = OpLoad %mat2v4float %21
+         %14 = OpTranspose %mat4v2float %22
+         %28 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %25 %int_1
+         %29 = OpLoad %v4float %28
+         %30 = OpVectorShuffle %v4float %29 %29 1 3 0 2
+         %23 = OpExtInst %float %24 Length %30
+         %32 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %25 %int_1
+         %33 = OpLoad %v4float %32
+         %34 = OpVectorShuffle %v4float %33 %33 1 3 0 2
+         %35 = OpCompositeExtract %float %34 0
+         %31 = OpExtInst %float %24 FAbs %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..dd05e4a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].ywxz);
+  let a = abs(u[0][1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl
new file mode 100644
index 0000000..397bcf3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f32>, 4>;
+
+fn a(a : array<mat2x4<f32>, 4>) {}
+fn b(m : mat2x4<f32>) {}
+fn c(v : vec4<f32>) {}
+fn d(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].ywxz);
+    d(u[1][0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ea9471a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(float2x4 a_1[4]) {
+}
+
+void b(float2x4 m) {
+}
+
+void c(float4 v) {
+}
+
+void d(float f_1) {
+}
+
+float2x4 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 32u));
+  c(asfloat(u[2]).ywxz);
+  d(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ea9471a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(float2x4 a_1[4]) {
+}
+
+void b(float2x4 m) {
+}
+
+void c(float4 v) {
+}
+
+void d(float f_1) {
+}
+
+float2x4 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 32u));
+  c(asfloat(u[2]).ywxz);
+  d(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..24562db
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x4 inner[4];
+} u;
+
+void a(mat2x4 a_1[4]) {
+}
+
+void b(mat2x4 m) {
+}
+
+void c(vec4 v) {
+}
+
+void d(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  c(u.inner[1][0].ywxz);
+  d(u.inner[1][0].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..4280139
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<float2x4, 4> a_1) {
+}
+
+void b(float2x4 m) {
+}
+
+void c(float4 v) {
+}
+
+void d(float f_1) {
+}
+
+kernel void f(const constant tint_array<float2x4, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(float4((*(tint_symbol))[1][0]).ywxz);
+  d(float4((*(tint_symbol))[1][0]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..f7ee0d9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 52
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v4float_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v4float_uint_4 = OpTypeArray %mat2v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void %_arr_mat2v4float_uint_4
+         %15 = OpTypeFunction %void %mat2v4float
+         %19 = OpTypeFunction %void %v4float
+         %23 = OpTypeFunction %void %float
+         %27 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2v4float_uint_4 = OpTypePointer Uniform %_arr_mat2v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+         %42 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat2v4float_uint_4
+         %14 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %15
+          %m = OpFunctionParameter %mat2v4float
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %19
+          %v = OpFunctionParameter %v4float
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %23
+        %f_1 = OpFunctionParameter %float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %27
+         %29 = OpLabel
+         %33 = OpAccessChain %_ptr_Uniform__arr_mat2v4float_uint_4 %u %uint_0
+         %34 = OpLoad %_arr_mat2v4float_uint_4 %33
+         %30 = OpFunctionCall %void %a %34
+         %39 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0 %int_1
+         %40 = OpLoad %mat2v4float %39
+         %35 = OpFunctionCall %void %b %40
+         %44 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1 %42
+         %45 = OpLoad %v4float %44
+         %46 = OpVectorShuffle %v4float %45 %45 1 3 0 2
+         %41 = OpFunctionCall %void %c %46
+         %48 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1 %42
+         %49 = OpLoad %v4float %48
+         %50 = OpVectorShuffle %v4float %49 %49 1 3 0 2
+         %51 = OpCompositeExtract %float %50 0
+         %47 = OpFunctionCall %void %d %51
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..5b6e18d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f32>, 4>;
+
+fn a(a : array<mat2x4<f32>, 4>) {
+}
+
+fn b(m : mat2x4<f32>) {
+}
+
+fn c(v : vec4<f32>) {
+}
+
+fn d(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].ywxz);
+  d(u[1][0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl
new file mode 100644
index 0000000..1ff3c55
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f32>, 4>;
+var<private> p : array<mat2x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].ywxz;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ba81ce8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static float2x4 p[4] = (float2x4[4])0;
+
+float2x4 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  p[1][0] = asfloat(u[1]).ywxz;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ba81ce8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static float2x4 p[4] = (float2x4[4])0;
+
+float2x4 tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  p[1][0] = asfloat(u[1]).ywxz;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..79b73d4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x4 inner[4];
+} u;
+
+mat2x4 p[4] = mat2x4[4](mat2x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat2x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat2x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat2x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[1][0] = u.inner[0][1].ywxz;
+  p[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..f28eca1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float2x4, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<float2x4, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = float4((*(tint_symbol_1))[0][1]).ywxz;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..a2c2daa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 41
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v4float_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v4float_uint_4 = OpTypeArray %mat2v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_mat2v4float_uint_4 = OpTypePointer Private %_arr_mat2v4float_uint_4
+         %12 = OpConstantNull %_arr_mat2v4float_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat2v4float_uint_4 Private %12
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2v4float_uint_4 = OpTypePointer Uniform %_arr_mat2v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat2v4float = OpTypePointer Private %mat2v4float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+         %29 = OpConstantNull %int
+%_ptr_Private_v4float = OpTypePointer Private %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_mat2v4float_uint_4 %u %uint_0
+         %20 = OpLoad %_arr_mat2v4float_uint_4 %19
+               OpStore %p %20
+         %24 = OpAccessChain %_ptr_Private_mat2v4float %p %int_1
+         %27 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0 %int_2
+         %28 = OpLoad %mat2v4float %27
+               OpStore %24 %28
+         %31 = OpAccessChain %_ptr_Private_v4float %p %int_1 %29
+         %33 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %29 %int_1
+         %34 = OpLoad %v4float %33
+         %35 = OpVectorShuffle %v4float %34 %34 1 3 0 2
+               OpStore %31 %35
+         %37 = OpAccessChain %_ptr_Private_float %p %int_1 %29 %uint_0
+         %39 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %29 %int_1 %uint_0
+         %40 = OpLoad %float %39
+               OpStore %37 %40
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..e503c8e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f32>, 4>;
+
+var<private> p : array<mat2x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].ywxz;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl
new file mode 100644
index 0000000..9968162
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f32>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat2x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].ywxz;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..747c20a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,44 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float2x4 value[4]) {
+  float2x4 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+float2x4 tint_symbol_4(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_4(u, 64u));
+  s.Store4(32u, asuint(asfloat(u[1]).ywxz));
+  s.Store(32u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..747c20a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,44 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float2x4 value[4]) {
+  float2x4 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+float2x4 tint_symbol_4(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_4(u, 64u));
+  s.Store4(32u, asuint(asfloat(u[1]).ywxz));
+  s.Store(32u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..ac0b6a2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x4 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat2x4 inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[1][0] = u.inner[0][1].ywxz;
+  s.inner[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..859f287
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<float2x4, 4>* tint_symbol [[buffer(1)]], const constant tint_array<float2x4, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float4((*(tint_symbol_1))[0][1]).ywxz;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..5e773dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,71 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v4float_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v4float_uint_4 = OpTypeArray %mat2v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat2v4float_uint_4 = OpTypePointer StorageBuffer %_arr_mat2v4float_uint_4
+%_ptr_Uniform__arr_mat2v4float_uint_4 = OpTypePointer Uniform %_arr_mat2v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+         %30 = OpConstantNull %int
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %18 = OpAccessChain %_ptr_StorageBuffer__arr_mat2v4float_uint_4 %s %uint_0
+         %20 = OpAccessChain %_ptr_Uniform__arr_mat2v4float_uint_4 %u %uint_0
+         %21 = OpLoad %_arr_mat2v4float_uint_4 %20
+               OpStore %18 %21
+         %25 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %s %uint_0 %int_1
+         %28 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0 %int_2
+         %29 = OpLoad %mat2v4float %28
+               OpStore %25 %29
+         %32 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1 %30
+         %34 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %30 %int_1
+         %35 = OpLoad %v4float %34
+         %36 = OpVectorShuffle %v4float %35 %35 1 3 0 2
+               OpStore %32 %36
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %int_1 %30 %uint_0
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %30 %int_1 %uint_0
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..48a709b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f32>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat2x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].ywxz;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..39402fe
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f32>, 4>;
+var<workgroup> w : array<mat2x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].ywxz;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..963cbf8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,45 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared float2x4 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x4 tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float2x4((0.0f).xxxx, (0.0f).xxxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  w[1][0] = asfloat(u[1]).ywxz;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..963cbf8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,45 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared float2x4 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x4 tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+typedef float2x4 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  float2x4 arr[4] = (float2x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float2x4((0.0f).xxxx, (0.0f).xxxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  w[1][0] = asfloat(u[1]).ywxz;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..e20f9d4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x4 inner[4];
+} u;
+
+shared mat2x4 w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = mat2x4(vec4(0.0f), vec4(0.0f));
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[1][0] = u.inner[0][1].ywxz;
+  w[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..d84ba92
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<float2x4, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<float2x4, 4>* const tint_symbol, const constant tint_array<float2x4, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = float2x4(float4(0.0f), float4(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float4((*(tint_symbol_1))[0][1]).ywxz;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<float2x4, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<float2x4, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..809115b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,115 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 70
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat2v4float_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+     %uint_4 = OpConstant %uint 4
+%_arr_mat2v4float_uint_4 = OpTypeArray %mat2v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat2v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_mat2v4float_uint_4 = OpTypePointer Workgroup %_arr_mat2v4float_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat2v4float_uint_4 Workgroup
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %21 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_mat2v4float = OpTypePointer Workgroup %mat2v4float
+         %35 = OpConstantNull %mat2v4float
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat2v4float_uint_4 = OpTypePointer Uniform %_arr_mat2v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+         %53 = OpConstantNull %int
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %65 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %14
+%local_invocation_index = OpFunctionParameter %uint
+         %18 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %21
+               OpStore %idx %local_invocation_index
+               OpBranch %22
+         %22 = OpLabel
+               OpLoopMerge %23 %24 None
+               OpBranch %25
+         %25 = OpLabel
+         %27 = OpLoad %uint %idx
+         %28 = OpULessThan %bool %27 %uint_4
+         %26 = OpLogicalNot %bool %28
+               OpSelectionMerge %30 None
+               OpBranchConditional %26 %31 %30
+         %31 = OpLabel
+               OpBranch %23
+         %30 = OpLabel
+         %32 = OpLoad %uint %idx
+         %34 = OpAccessChain %_ptr_Workgroup_mat2v4float %w %32
+               OpStore %34 %35
+               OpBranch %24
+         %24 = OpLabel
+         %36 = OpLoad %uint %idx
+         %38 = OpIAdd %uint %36 %uint_1
+               OpStore %idx %38
+               OpBranch %22
+         %23 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %44 = OpAccessChain %_ptr_Uniform__arr_mat2v4float_uint_4 %u %uint_0
+         %45 = OpLoad %_arr_mat2v4float_uint_4 %44
+               OpStore %w %45
+         %48 = OpAccessChain %_ptr_Workgroup_mat2v4float %w %int_1
+         %51 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0 %int_2
+         %52 = OpLoad %mat2v4float %51
+               OpStore %48 %52
+         %55 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1 %53
+         %57 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %53 %int_1
+         %58 = OpLoad %v4float %57
+         %59 = OpVectorShuffle %v4float %58 %58 1 3 0 2
+               OpStore %55 %59
+         %61 = OpAccessChain %_ptr_Workgroup_float %w %int_1 %53 %uint_0
+         %63 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %53 %int_1 %uint_0
+         %64 = OpLoad %float %63
+               OpStore %61 %64
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %65
+         %67 = OpLabel
+         %69 = OpLoad %uint %local_invocation_index_1
+         %68 = OpFunctionCall %void %f_inner %69
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..d4f0827
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat2x4_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat2x4<f32>, 4>;
+
+var<workgroup> w : array<mat2x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].ywxz;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..36e3407
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+@group(0) @binding(0) var<uniform> a : array<mat3x3<f32>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat3x3<f32>, 4> = *p_a;
+  let l_a_i     : mat3x3<f32>           = *p_a_i;
+  let l_a_i_i   : vec3<f32>             = *p_a_i_i;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ec7bba0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,38 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[12];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x3 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float3x3 l_a[4] = tint_symbol(a, 0u);
+  const float3x3 l_a_i = tint_symbol_1(a, (48u * uint(p_a_i_save)));
+  const uint scalar_offset_3 = (((48u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float3 l_a_i_i = asfloat(a[scalar_offset_3 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ec7bba0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,38 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[12];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x3 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float3x3 l_a[4] = tint_symbol(a, 0u);
+  const float3x3 l_a_i = tint_symbol_1(a, (48u * uint(p_a_i_save)));
+  const uint scalar_offset_3 = (((48u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float3 l_a_i_i = asfloat(a[scalar_offset_3 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..dfafea6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,27 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat3 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_i_save = tint_symbol_1;
+  mat3 l_a[4] = a.inner;
+  mat3 l_a_i = a.inner[p_a_i_save];
+  vec3 l_a_i_i = a.inner[p_a_i_save][p_a_i_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..33dacb2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<float3x3, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<float3x3, 4> const l_a = *(tint_symbol_3);
+  float3x3 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  float3 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..8e7e1f9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,64 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v3float_uint_4 ArrayStride 48
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v3float_uint_4 = OpTypeArray %mat3v3float %uint_4
+    %a_block = OpTypeStruct %_arr_mat3v3float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %21 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat3v3float_uint_4 = OpTypePointer Uniform %_arr_mat3v3float_uint_4
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+          %f = OpFunction %void None %21
+         %24 = OpLabel
+         %25 = OpFunctionCall %int %i
+         %26 = OpFunctionCall %int %i
+         %29 = OpAccessChain %_ptr_Uniform__arr_mat3v3float_uint_4 %a %uint_0
+         %30 = OpLoad %_arr_mat3v3float_uint_4 %29
+         %32 = OpAccessChain %_ptr_Uniform_mat3v3float %a %uint_0 %25
+         %33 = OpLoad %mat3v3float %32
+         %35 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %25 %26
+         %36 = OpLoad %v3float %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..8523c51
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+@group(0) @binding(0) var<uniform> a : array<mat3x3<f32>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat3x3<f32>, 4> = *(p_a);
+  let l_a_i : mat3x3<f32> = *(p_a_i);
+  let l_a_i_i : vec3<f32> = *(p_a_i_i);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..4d868b4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,12 @@
+@group(0) @binding(0) var<uniform> a : array<mat3x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat3x3<f32>, 4> = *p_a;
+  let l_a_i     : mat3x3<f32>           = *p_a_2;
+  let l_a_i_i   : vec3<f32>             = *p_a_2_1;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ac8182a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,29 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[12];
+};
+
+float3x3 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x3 l_a[4] = tint_symbol(a, 0u);
+  const float3x3 l_a_i = tint_symbol_1(a, 96u);
+  const float3 l_a_i_i = asfloat(a[7].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ac8182a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,29 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[12];
+};
+
+float3x3 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x3 l_a[4] = tint_symbol(a, 0u);
+  const float3x3 l_a_i = tint_symbol_1(a, 96u);
+  const float3 l_a_i_i = asfloat(a[7].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..926acda
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat3 inner[4];
+} a;
+
+void f() {
+  mat3 l_a[4] = a.inner;
+  mat3 l_a_i = a.inner[2];
+  vec3 l_a_i_i = a.inner[2][1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..05a9ea9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float3x3, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<float3x3, 4> const l_a = *(tint_symbol);
+  float3x3 const l_a_i = (*(tint_symbol))[2];
+  float3 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..97cff3e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,49 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v3float_uint_4 ArrayStride 48
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v3float_uint_4 = OpTypeArray %mat3v3float %uint_4
+    %a_block = OpTypeStruct %_arr_mat3v3float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat3v3float_uint_4 = OpTypePointer Uniform %_arr_mat3v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform__arr_mat3v3float_uint_4 %a %uint_0
+         %17 = OpLoad %_arr_mat3v3float_uint_4 %16
+         %21 = OpAccessChain %_ptr_Uniform_mat3v3float %a %uint_0 %int_2
+         %22 = OpLoad %mat3v3float %21
+         %25 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %int_2 %int_1
+         %26 = OpLoad %v3float %25
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..f147611
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> a : array<mat3x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat3x3<f32>, 4> = *(p_a);
+  let l_a_i : mat3x3<f32> = *(p_a_2);
+  let l_a_i_i : vec3<f32> = *(p_a_2_1);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl
new file mode 100644
index 0000000..ac6fec7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].zxy);
+    let a = abs(u[0][1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..6bf5247
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+float3x3 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x3 t = transpose(tint_symbol(u, 96u));
+  const float l = length(asfloat(u[1].xyz).zxy);
+  const float a = abs(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..6bf5247
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+float3x3 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x3 t = transpose(tint_symbol(u, 96u));
+  const float l = length(asfloat(u[1].xyz).zxy);
+  const float a = abs(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..c6be6eb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3 inner[4];
+} u;
+
+void f() {
+  mat3 t = transpose(u.inner[2]);
+  float l = length(u.inner[0][1].zxy);
+  float a = abs(u.inner[0][1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..16b5d79
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float3x3, 4>* tint_symbol [[buffer(0)]]) {
+  float3x3 const t = transpose((*(tint_symbol))[2]);
+  float const l = length(float3((*(tint_symbol))[0][1]).zxy);
+  float const a = fabs(float3((*(tint_symbol))[0][1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..5c1e625
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 34
+; Schema: 0
+               OpCapability Shader
+         %22 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v3float_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v3float_uint_4 = OpTypeArray %mat3v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat3v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+         %23 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0 %int_2
+         %20 = OpLoad %mat3v3float %19
+         %14 = OpTranspose %mat3v3float %20
+         %26 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %23 %int_1
+         %27 = OpLoad %v3float %26
+         %28 = OpVectorShuffle %v3float %27 %27 2 0 1
+         %21 = OpExtInst %float %22 Length %28
+         %30 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %23 %int_1
+         %31 = OpLoad %v3float %30
+         %32 = OpVectorShuffle %v3float %31 %31 2 0 1
+         %33 = OpCompositeExtract %float %32 0
+         %29 = OpExtInst %float %22 FAbs %33
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..5a0760d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].zxy);
+  let a = abs(u[0][1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl
new file mode 100644
index 0000000..b35b784
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x3<f32>, 4>;
+
+fn a(a : array<mat3x3<f32>, 4>) {}
+fn b(m : mat3x3<f32>) {}
+fn c(v : vec3<f32>) {}
+fn d(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].zxy);
+    d(u[1][0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3251692
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,42 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+void a(float3x3 a_1[4]) {
+}
+
+void b(float3x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+float3x3 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 48u));
+  c(asfloat(u[3].xyz).zxy);
+  d(asfloat(u[3].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..3251692
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,42 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+void a(float3x3 a_1[4]) {
+}
+
+void b(float3x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+float3x3 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 48u));
+  c(asfloat(u[3].xyz).zxy);
+  d(asfloat(u[3].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..d6e73ff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3 inner[4];
+} u;
+
+void a(mat3 a_1[4]) {
+}
+
+void b(mat3 m) {
+}
+
+void c(vec3 v) {
+}
+
+void d(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  c(u.inner[1][0].zxy);
+  d(u.inner[1][0].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..15297bd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<float3x3, 4> a_1) {
+}
+
+void b(float3x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+kernel void f(const constant tint_array<float3x3, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(float3((*(tint_symbol))[1][0]).zxy);
+  d(float3((*(tint_symbol))[1][0]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..3cdc026
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 52
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v3float_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v3float_uint_4 = OpTypeArray %mat3v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat3v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void %_arr_mat3v3float_uint_4
+         %15 = OpTypeFunction %void %mat3v3float
+         %19 = OpTypeFunction %void %v3float
+         %23 = OpTypeFunction %void %float
+         %27 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat3v3float_uint_4 = OpTypePointer Uniform %_arr_mat3v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+         %42 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat3v3float_uint_4
+         %14 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %15
+          %m = OpFunctionParameter %mat3v3float
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %19
+          %v = OpFunctionParameter %v3float
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %23
+        %f_1 = OpFunctionParameter %float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %27
+         %29 = OpLabel
+         %33 = OpAccessChain %_ptr_Uniform__arr_mat3v3float_uint_4 %u %uint_0
+         %34 = OpLoad %_arr_mat3v3float_uint_4 %33
+         %30 = OpFunctionCall %void %a %34
+         %39 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0 %int_1
+         %40 = OpLoad %mat3v3float %39
+         %35 = OpFunctionCall %void %b %40
+         %44 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1 %42
+         %45 = OpLoad %v3float %44
+         %46 = OpVectorShuffle %v3float %45 %45 2 0 1
+         %41 = OpFunctionCall %void %c %46
+         %48 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1 %42
+         %49 = OpLoad %v3float %48
+         %50 = OpVectorShuffle %v3float %49 %49 2 0 1
+         %51 = OpCompositeExtract %float %50 0
+         %47 = OpFunctionCall %void %d %51
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..a78c8c9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x3<f32>, 4>;
+
+fn a(a : array<mat3x3<f32>, 4>) {
+}
+
+fn b(m : mat3x3<f32>) {
+}
+
+fn c(v : vec3<f32>) {
+}
+
+fn d(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].zxy);
+  d(u[1][0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl
new file mode 100644
index 0000000..133e0e5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x3<f32>, 4>;
+var<private> p : array<mat3x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].zxy;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..4bf04e7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+static float3x3 p[4] = (float3x3[4])0;
+
+float3x3 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 96u);
+  p[1][0] = asfloat(u[1].xyz).zxy;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..4bf04e7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+static float3x3 p[4] = (float3x3[4])0;
+
+float3x3 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 96u);
+  p[1][0] = asfloat(u[1].xyz).zxy;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..3aac30a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3 inner[4];
+} u;
+
+mat3 p[4] = mat3[4](mat3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[1][0] = u.inner[0][1].zxy;
+  p[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..0c7a2db
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float3x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<float3x3, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..e3b8dae
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 41
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v3float_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v3float_uint_4 = OpTypeArray %mat3v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat3v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_mat3v3float_uint_4 = OpTypePointer Private %_arr_mat3v3float_uint_4
+         %12 = OpConstantNull %_arr_mat3v3float_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat3v3float_uint_4 Private %12
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat3v3float_uint_4 = OpTypePointer Uniform %_arr_mat3v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat3v3float = OpTypePointer Private %mat3v3float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+         %29 = OpConstantNull %int
+%_ptr_Private_v3float = OpTypePointer Private %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_mat3v3float_uint_4 %u %uint_0
+         %20 = OpLoad %_arr_mat3v3float_uint_4 %19
+               OpStore %p %20
+         %24 = OpAccessChain %_ptr_Private_mat3v3float %p %int_1
+         %27 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0 %int_2
+         %28 = OpLoad %mat3v3float %27
+               OpStore %24 %28
+         %31 = OpAccessChain %_ptr_Private_v3float %p %int_1 %29
+         %33 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %29 %int_1
+         %34 = OpLoad %v3float %33
+         %35 = OpVectorShuffle %v3float %34 %34 2 0 1
+               OpStore %31 %35
+         %37 = OpAccessChain %_ptr_Private_float %p %int_1 %29 %uint_0
+         %39 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %29 %int_1 %uint_0
+         %40 = OpLoad %float %39
+               OpStore %37 %40
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..59b72cf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x3<f32>, 4>;
+
+var<private> p : array<mat3x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].zxy;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl
new file mode 100644
index 0000000..cc5a3f1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x3<f32>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat3x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].zxy;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..6dc59a8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,46 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float3x3 value[4]) {
+  float3x3 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 48u)), array[i]);
+    }
+  }
+}
+
+float3x3 tint_symbol_4(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 48u, tint_symbol_4(u, 96u));
+  s.Store3(48u, asuint(asfloat(u[1].xyz).zxy));
+  s.Store(48u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..6dc59a8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float3x3 value[4]) {
+  float3x3 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 48u)), array[i]);
+    }
+  }
+}
+
+float3x3 tint_symbol_4(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 48u, tint_symbol_4(u, 96u));
+  s.Store3(48u, asuint(asfloat(u[1].xyz).zxy));
+  s.Store(48u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..9105811
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat3 inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[1][0] = u.inner[0][1].zxy;
+  s.inner[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..048579a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<float3x3, 4>* tint_symbol [[buffer(1)]], const constant tint_array<float3x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..2dd00c0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,71 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v3float_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v3float_uint_4 = OpTypeArray %mat3v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat3v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat3v3float_uint_4 = OpTypePointer StorageBuffer %_arr_mat3v3float_uint_4
+%_ptr_Uniform__arr_mat3v3float_uint_4 = OpTypePointer Uniform %_arr_mat3v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+         %30 = OpConstantNull %int
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %18 = OpAccessChain %_ptr_StorageBuffer__arr_mat3v3float_uint_4 %s %uint_0
+         %20 = OpAccessChain %_ptr_Uniform__arr_mat3v3float_uint_4 %u %uint_0
+         %21 = OpLoad %_arr_mat3v3float_uint_4 %20
+               OpStore %18 %21
+         %25 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %s %uint_0 %int_1
+         %28 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0 %int_2
+         %29 = OpLoad %mat3v3float %28
+               OpStore %25 %29
+         %32 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1 %30
+         %34 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %30 %int_1
+         %35 = OpLoad %v3float %34
+         %36 = OpVectorShuffle %v3float %35 %35 2 0 1
+               OpStore %32 %36
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %int_1 %30 %uint_0
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %30 %int_1 %uint_0
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..a4f9263
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x3<f32>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat3x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].zxy;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..8f011e3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x3<f32>, 4>;
+var<workgroup> w : array<mat3x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].zxy;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..1751886
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,46 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+groupshared float3x3 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x3 tint_symbol_3(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 96u);
+  w[1][0] = asfloat(u[1].xyz).zxy;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..1751886
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+groupshared float3x3 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x3 tint_symbol_3(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+typedef float3x3 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[12], uint offset) {
+  float3x3 arr[4] = (float3x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 96u);
+  w[1][0] = asfloat(u[1].xyz).zxy;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..0670f59
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3 inner[4];
+} u;
+
+shared mat3 w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = mat3(vec3(0.0f), vec3(0.0f), vec3(0.0f));
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[1][0] = u.inner[0][1].zxy;
+  w[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..d1bf3a7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<float3x3, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<float3x3, 4>* const tint_symbol, const constant tint_array<float3x3, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = float3x3(float3(0.0f), float3(0.0f), float3(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<float3x3, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<float3x3, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..309d2ed
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,115 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 70
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v3float_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v3float_uint_4 = OpTypeArray %mat3v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat3v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_mat3v3float_uint_4 = OpTypePointer Workgroup %_arr_mat3v3float_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat3v3float_uint_4 Workgroup
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %21 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_mat3v3float = OpTypePointer Workgroup %mat3v3float
+         %35 = OpConstantNull %mat3v3float
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat3v3float_uint_4 = OpTypePointer Uniform %_arr_mat3v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+         %53 = OpConstantNull %int
+%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %65 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %14
+%local_invocation_index = OpFunctionParameter %uint
+         %18 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %21
+               OpStore %idx %local_invocation_index
+               OpBranch %22
+         %22 = OpLabel
+               OpLoopMerge %23 %24 None
+               OpBranch %25
+         %25 = OpLabel
+         %27 = OpLoad %uint %idx
+         %28 = OpULessThan %bool %27 %uint_4
+         %26 = OpLogicalNot %bool %28
+               OpSelectionMerge %30 None
+               OpBranchConditional %26 %31 %30
+         %31 = OpLabel
+               OpBranch %23
+         %30 = OpLabel
+         %32 = OpLoad %uint %idx
+         %34 = OpAccessChain %_ptr_Workgroup_mat3v3float %w %32
+               OpStore %34 %35
+               OpBranch %24
+         %24 = OpLabel
+         %36 = OpLoad %uint %idx
+         %38 = OpIAdd %uint %36 %uint_1
+               OpStore %idx %38
+               OpBranch %22
+         %23 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %44 = OpAccessChain %_ptr_Uniform__arr_mat3v3float_uint_4 %u %uint_0
+         %45 = OpLoad %_arr_mat3v3float_uint_4 %44
+               OpStore %w %45
+         %48 = OpAccessChain %_ptr_Workgroup_mat3v3float %w %int_1
+         %51 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0 %int_2
+         %52 = OpLoad %mat3v3float %51
+               OpStore %48 %52
+         %55 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1 %53
+         %57 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %53 %int_1
+         %58 = OpLoad %v3float %57
+         %59 = OpVectorShuffle %v3float %58 %58 2 0 1
+               OpStore %55 %59
+         %61 = OpAccessChain %_ptr_Workgroup_float %w %int_1 %53 %uint_0
+         %63 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %53 %int_1 %uint_0
+         %64 = OpLoad %float %63
+               OpStore %61 %64
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %65
+         %67 = OpLabel
+         %69 = OpLoad %uint %local_invocation_index_1
+         %68 = OpFunctionCall %void %f_inner %69
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..8e7202f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x3<f32>, 4>;
+
+var<workgroup> w : array<mat3x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].zxy;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..8a601d4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+@group(0) @binding(0) var<uniform> a : array<mat3x4<f32>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat3x4<f32>, 4> = *p_a;
+  let l_a_i     : mat3x4<f32>           = *p_a_i;
+  let l_a_i_i   : vec4<f32>             = *p_a_i_i;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..89cb90f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,38 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[12];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x4 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float3x4 l_a[4] = tint_symbol(a, 0u);
+  const float3x4 l_a_i = tint_symbol_1(a, (48u * uint(p_a_i_save)));
+  const uint scalar_offset_3 = (((48u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float4 l_a_i_i = asfloat(a[scalar_offset_3 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..89cb90f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,38 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[12];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x4 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float3x4 l_a[4] = tint_symbol(a, 0u);
+  const float3x4 l_a_i = tint_symbol_1(a, (48u * uint(p_a_i_save)));
+  const uint scalar_offset_3 = (((48u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float4 l_a_i_i = asfloat(a[scalar_offset_3 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..3abf919
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,27 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat3x4 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_i_save = tint_symbol_1;
+  mat3x4 l_a[4] = a.inner;
+  mat3x4 l_a_i = a.inner[p_a_i_save];
+  vec4 l_a_i_i = a.inner[p_a_i_save][p_a_i_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..fccba32
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<float3x4, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<float3x4, 4> const l_a = *(tint_symbol_3);
+  float3x4 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  float4 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..44c91f2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,64 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v4float_uint_4 ArrayStride 48
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v4float_uint_4 = OpTypeArray %mat3v4float %uint_4
+    %a_block = OpTypeStruct %_arr_mat3v4float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %21 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat3v4float_uint_4 = OpTypePointer Uniform %_arr_mat3v4float_uint_4
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+          %f = OpFunction %void None %21
+         %24 = OpLabel
+         %25 = OpFunctionCall %int %i
+         %26 = OpFunctionCall %int %i
+         %29 = OpAccessChain %_ptr_Uniform__arr_mat3v4float_uint_4 %a %uint_0
+         %30 = OpLoad %_arr_mat3v4float_uint_4 %29
+         %32 = OpAccessChain %_ptr_Uniform_mat3v4float %a %uint_0 %25
+         %33 = OpLoad %mat3v4float %32
+         %35 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %25 %26
+         %36 = OpLoad %v4float %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..35174c9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+@group(0) @binding(0) var<uniform> a : array<mat3x4<f32>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat3x4<f32>, 4> = *(p_a);
+  let l_a_i : mat3x4<f32> = *(p_a_i);
+  let l_a_i_i : vec4<f32> = *(p_a_i_i);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..1cdf69f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,12 @@
+@group(0) @binding(0) var<uniform> a : array<mat3x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat3x4<f32>, 4> = *p_a;
+  let l_a_i     : mat3x4<f32>           = *p_a_2;
+  let l_a_i_i   : vec4<f32>             = *p_a_2_1;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ebb5252
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,29 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[12];
+};
+
+float3x4 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 l_a[4] = tint_symbol(a, 0u);
+  const float3x4 l_a_i = tint_symbol_1(a, 96u);
+  const float4 l_a_i_i = asfloat(a[7]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ebb5252
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,29 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[12];
+};
+
+float3x4 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 l_a[4] = tint_symbol(a, 0u);
+  const float3x4 l_a_i = tint_symbol_1(a, 96u);
+  const float4 l_a_i_i = asfloat(a[7]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..97d3721
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat3x4 inner[4];
+} a;
+
+void f() {
+  mat3x4 l_a[4] = a.inner;
+  mat3x4 l_a_i = a.inner[2];
+  vec4 l_a_i_i = a.inner[2][1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..94022a0b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float3x4, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<float3x4, 4> const l_a = *(tint_symbol);
+  float3x4 const l_a_i = (*(tint_symbol))[2];
+  float4 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..bc3f5fa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,49 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v4float_uint_4 ArrayStride 48
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v4float_uint_4 = OpTypeArray %mat3v4float %uint_4
+    %a_block = OpTypeStruct %_arr_mat3v4float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat3v4float_uint_4 = OpTypePointer Uniform %_arr_mat3v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform__arr_mat3v4float_uint_4 %a %uint_0
+         %17 = OpLoad %_arr_mat3v4float_uint_4 %16
+         %21 = OpAccessChain %_ptr_Uniform_mat3v4float %a %uint_0 %int_2
+         %22 = OpLoad %mat3v4float %21
+         %25 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %int_2 %int_1
+         %26 = OpLoad %v4float %25
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..70df08d4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> a : array<mat3x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat3x4<f32>, 4> = *(p_a);
+  let l_a_i : mat3x4<f32> = *(p_a_2);
+  let l_a_i_i : vec4<f32> = *(p_a_2_1);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl
new file mode 100644
index 0000000..e5fd1b6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].ywxz);
+    let a = abs(u[0][1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e9ab804
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+float3x4 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 t = transpose(tint_symbol(u, 96u));
+  const float l = length(asfloat(u[1]).ywxz);
+  const float a = abs(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e9ab804
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+float3x4 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 t = transpose(tint_symbol(u, 96u));
+  const float l = length(asfloat(u[1]).ywxz);
+  const float a = abs(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..1281049
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3x4 inner[4];
+} u;
+
+void f() {
+  mat4x3 t = transpose(u.inner[2]);
+  float l = length(u.inner[0][1].ywxz);
+  float a = abs(u.inner[0][1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..e7f5cd5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float3x4, 4>* tint_symbol [[buffer(0)]]) {
+  float4x3 const t = transpose((*(tint_symbol))[2]);
+  float const l = length(float4((*(tint_symbol))[0][1]).ywxz);
+  float const a = fabs(float4((*(tint_symbol))[0][1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..a7c1bcd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,58 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+         %24 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v4float_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v4float_uint_4 = OpTypeArray %mat3v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat3v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+         %25 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0 %int_2
+         %22 = OpLoad %mat3v4float %21
+         %14 = OpTranspose %mat4v3float %22
+         %28 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %25 %int_1
+         %29 = OpLoad %v4float %28
+         %30 = OpVectorShuffle %v4float %29 %29 1 3 0 2
+         %23 = OpExtInst %float %24 Length %30
+         %32 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %25 %int_1
+         %33 = OpLoad %v4float %32
+         %34 = OpVectorShuffle %v4float %33 %33 1 3 0 2
+         %35 = OpCompositeExtract %float %34 0
+         %31 = OpExtInst %float %24 FAbs %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..57abc4a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].ywxz);
+  let a = abs(u[0][1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl
new file mode 100644
index 0000000..cc37dd7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x4<f32>, 4>;
+
+fn a(a : array<mat3x4<f32>, 4>) {}
+fn b(m : mat3x4<f32>) {}
+fn c(v : vec4<f32>) {}
+fn d(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].ywxz);
+    d(u[1][0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a16dec7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,42 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+void a(float3x4 a_1[4]) {
+}
+
+void b(float3x4 m) {
+}
+
+void c(float4 v) {
+}
+
+void d(float f_1) {
+}
+
+float3x4 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 48u));
+  c(asfloat(u[3]).ywxz);
+  d(asfloat(u[3]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a16dec7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,42 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+
+void a(float3x4 a_1[4]) {
+}
+
+void b(float3x4 m) {
+}
+
+void c(float4 v) {
+}
+
+void d(float f_1) {
+}
+
+float3x4 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 48u));
+  c(asfloat(u[3]).ywxz);
+  d(asfloat(u[3]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..866be189
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3x4 inner[4];
+} u;
+
+void a(mat3x4 a_1[4]) {
+}
+
+void b(mat3x4 m) {
+}
+
+void c(vec4 v) {
+}
+
+void d(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  c(u.inner[1][0].ywxz);
+  d(u.inner[1][0].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..cc6a3b7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<float3x4, 4> a_1) {
+}
+
+void b(float3x4 m) {
+}
+
+void c(float4 v) {
+}
+
+void d(float f_1) {
+}
+
+kernel void f(const constant tint_array<float3x4, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(float4((*(tint_symbol))[1][0]).ywxz);
+  d(float4((*(tint_symbol))[1][0]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..807fe1f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 52
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v4float_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v4float_uint_4 = OpTypeArray %mat3v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat3v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void %_arr_mat3v4float_uint_4
+         %15 = OpTypeFunction %void %mat3v4float
+         %19 = OpTypeFunction %void %v4float
+         %23 = OpTypeFunction %void %float
+         %27 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat3v4float_uint_4 = OpTypePointer Uniform %_arr_mat3v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+         %42 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat3v4float_uint_4
+         %14 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %15
+          %m = OpFunctionParameter %mat3v4float
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %19
+          %v = OpFunctionParameter %v4float
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %23
+        %f_1 = OpFunctionParameter %float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %27
+         %29 = OpLabel
+         %33 = OpAccessChain %_ptr_Uniform__arr_mat3v4float_uint_4 %u %uint_0
+         %34 = OpLoad %_arr_mat3v4float_uint_4 %33
+         %30 = OpFunctionCall %void %a %34
+         %39 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0 %int_1
+         %40 = OpLoad %mat3v4float %39
+         %35 = OpFunctionCall %void %b %40
+         %44 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1 %42
+         %45 = OpLoad %v4float %44
+         %46 = OpVectorShuffle %v4float %45 %45 1 3 0 2
+         %41 = OpFunctionCall %void %c %46
+         %48 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1 %42
+         %49 = OpLoad %v4float %48
+         %50 = OpVectorShuffle %v4float %49 %49 1 3 0 2
+         %51 = OpCompositeExtract %float %50 0
+         %47 = OpFunctionCall %void %d %51
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..813ff50
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x4<f32>, 4>;
+
+fn a(a : array<mat3x4<f32>, 4>) {
+}
+
+fn b(m : mat3x4<f32>) {
+}
+
+fn c(v : vec4<f32>) {
+}
+
+fn d(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].ywxz);
+  d(u[1][0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl
new file mode 100644
index 0000000..db89fff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x4<f32>, 4>;
+var<private> p : array<mat3x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].ywxz;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b708074
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+static float3x4 p[4] = (float3x4[4])0;
+
+float3x4 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 96u);
+  p[1][0] = asfloat(u[1]).ywxz;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b708074
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+static float3x4 p[4] = (float3x4[4])0;
+
+float3x4 tint_symbol_1(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 96u);
+  p[1][0] = asfloat(u[1]).ywxz;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..e4bd6a7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3x4 inner[4];
+} u;
+
+mat3x4 p[4] = mat3x4[4](mat3x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat3x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat3x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat3x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[1][0] = u.inner[0][1].ywxz;
+  p[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..478a68e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float3x4, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<float3x4, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = float4((*(tint_symbol_1))[0][1]).ywxz;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..edf4aae
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 41
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v4float_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v4float_uint_4 = OpTypeArray %mat3v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat3v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_mat3v4float_uint_4 = OpTypePointer Private %_arr_mat3v4float_uint_4
+         %12 = OpConstantNull %_arr_mat3v4float_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat3v4float_uint_4 Private %12
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat3v4float_uint_4 = OpTypePointer Uniform %_arr_mat3v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat3v4float = OpTypePointer Private %mat3v4float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+         %29 = OpConstantNull %int
+%_ptr_Private_v4float = OpTypePointer Private %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_mat3v4float_uint_4 %u %uint_0
+         %20 = OpLoad %_arr_mat3v4float_uint_4 %19
+               OpStore %p %20
+         %24 = OpAccessChain %_ptr_Private_mat3v4float %p %int_1
+         %27 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0 %int_2
+         %28 = OpLoad %mat3v4float %27
+               OpStore %24 %28
+         %31 = OpAccessChain %_ptr_Private_v4float %p %int_1 %29
+         %33 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %29 %int_1
+         %34 = OpLoad %v4float %33
+         %35 = OpVectorShuffle %v4float %34 %34 1 3 0 2
+               OpStore %31 %35
+         %37 = OpAccessChain %_ptr_Private_float %p %int_1 %29 %uint_0
+         %39 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %29 %int_1 %uint_0
+         %40 = OpLoad %float %39
+               OpStore %37 %40
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..65e268e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x4<f32>, 4>;
+
+var<private> p : array<mat3x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].ywxz;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl
new file mode 100644
index 0000000..908c09b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x4<f32>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat3x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].ywxz;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b82dc9c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,46 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float3x4 value[4]) {
+  float3x4 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 48u)), array[i]);
+    }
+  }
+}
+
+float3x4 tint_symbol_4(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 48u, tint_symbol_4(u, 96u));
+  s.Store4(48u, asuint(asfloat(u[1]).ywxz));
+  s.Store(48u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b82dc9c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float3x4 value[4]) {
+  float3x4 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 48u)), array[i]);
+    }
+  }
+}
+
+float3x4 tint_symbol_4(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 48u, tint_symbol_4(u, 96u));
+  s.Store4(48u, asuint(asfloat(u[1]).ywxz));
+  s.Store(48u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..b366851
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3x4 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat3x4 inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[1][0] = u.inner[0][1].ywxz;
+  s.inner[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..d659b7b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<float3x4, 4>* tint_symbol [[buffer(1)]], const constant tint_array<float3x4, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float4((*(tint_symbol_1))[0][1]).ywxz;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..ea80ea1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,71 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v4float_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v4float_uint_4 = OpTypeArray %mat3v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat3v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat3v4float_uint_4 = OpTypePointer StorageBuffer %_arr_mat3v4float_uint_4
+%_ptr_Uniform__arr_mat3v4float_uint_4 = OpTypePointer Uniform %_arr_mat3v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+         %30 = OpConstantNull %int
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %18 = OpAccessChain %_ptr_StorageBuffer__arr_mat3v4float_uint_4 %s %uint_0
+         %20 = OpAccessChain %_ptr_Uniform__arr_mat3v4float_uint_4 %u %uint_0
+         %21 = OpLoad %_arr_mat3v4float_uint_4 %20
+               OpStore %18 %21
+         %25 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %s %uint_0 %int_1
+         %28 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0 %int_2
+         %29 = OpLoad %mat3v4float %28
+               OpStore %25 %29
+         %32 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1 %30
+         %34 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %30 %int_1
+         %35 = OpLoad %v4float %34
+         %36 = OpVectorShuffle %v4float %35 %35 1 3 0 2
+               OpStore %32 %36
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %int_1 %30 %uint_0
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %30 %int_1 %uint_0
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..12a5b2b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x4<f32>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat3x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].ywxz;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..bf1ee70
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x4<f32>, 4>;
+var<workgroup> w : array<mat3x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].ywxz;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b27032e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,46 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+groupshared float3x4 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x4 tint_symbol_3(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 96u);
+  w[1][0] = asfloat(u[1]).ywxz;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b27032e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[12];
+};
+groupshared float3x4 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x4 tint_symbol_3(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+typedef float3x4 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[12], uint offset) {
+  float3x4 arr[4] = (float3x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 48u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 96u);
+  w[1][0] = asfloat(u[1]).ywxz;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..79d7797
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3x4 inner[4];
+} u;
+
+shared mat3x4 w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = mat3x4(vec4(0.0f), vec4(0.0f), vec4(0.0f));
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[1][0] = u.inner[0][1].ywxz;
+  w[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..f955a90
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<float3x4, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<float3x4, 4>* const tint_symbol, const constant tint_array<float3x4, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = float3x4(float4(0.0f), float4(0.0f), float4(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float4((*(tint_symbol_1))[0][1]).ywxz;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<float3x4, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<float3x4, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..fb19eed
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,115 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 70
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat3v4float_uint_4 ArrayStride 48
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+     %uint_4 = OpConstant %uint 4
+%_arr_mat3v4float_uint_4 = OpTypeArray %mat3v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat3v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_mat3v4float_uint_4 = OpTypePointer Workgroup %_arr_mat3v4float_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat3v4float_uint_4 Workgroup
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %21 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_mat3v4float = OpTypePointer Workgroup %mat3v4float
+         %35 = OpConstantNull %mat3v4float
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat3v4float_uint_4 = OpTypePointer Uniform %_arr_mat3v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+         %53 = OpConstantNull %int
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %65 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %14
+%local_invocation_index = OpFunctionParameter %uint
+         %18 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %21
+               OpStore %idx %local_invocation_index
+               OpBranch %22
+         %22 = OpLabel
+               OpLoopMerge %23 %24 None
+               OpBranch %25
+         %25 = OpLabel
+         %27 = OpLoad %uint %idx
+         %28 = OpULessThan %bool %27 %uint_4
+         %26 = OpLogicalNot %bool %28
+               OpSelectionMerge %30 None
+               OpBranchConditional %26 %31 %30
+         %31 = OpLabel
+               OpBranch %23
+         %30 = OpLabel
+         %32 = OpLoad %uint %idx
+         %34 = OpAccessChain %_ptr_Workgroup_mat3v4float %w %32
+               OpStore %34 %35
+               OpBranch %24
+         %24 = OpLabel
+         %36 = OpLoad %uint %idx
+         %38 = OpIAdd %uint %36 %uint_1
+               OpStore %idx %38
+               OpBranch %22
+         %23 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %44 = OpAccessChain %_ptr_Uniform__arr_mat3v4float_uint_4 %u %uint_0
+         %45 = OpLoad %_arr_mat3v4float_uint_4 %44
+               OpStore %w %45
+         %48 = OpAccessChain %_ptr_Workgroup_mat3v4float %w %int_1
+         %51 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0 %int_2
+         %52 = OpLoad %mat3v4float %51
+               OpStore %48 %52
+         %55 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1 %53
+         %57 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %53 %int_1
+         %58 = OpLoad %v4float %57
+         %59 = OpVectorShuffle %v4float %58 %58 1 3 0 2
+               OpStore %55 %59
+         %61 = OpAccessChain %_ptr_Workgroup_float %w %int_1 %53 %uint_0
+         %63 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %53 %int_1 %uint_0
+         %64 = OpLoad %float %63
+               OpStore %61 %64
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %65
+         %67 = OpLabel
+         %69 = OpLoad %uint %local_invocation_index_1
+         %68 = OpFunctionCall %void %f_inner %69
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..da91f6b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat3x4_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat3x4<f32>, 4>;
+
+var<workgroup> w : array<mat3x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].ywxz;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl
deleted file mode 100644
index 618416a..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
-var<private> p : array<mat4x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    p = u;
-    p[1] = u[2];
-    p[1][0] = u[0][1].yx;
-    p[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.glsl
deleted file mode 100644
index 01b3ece..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.glsl
+++ /dev/null

@@ -1,46 +0,0 @@
-#version 310 es
-
-struct mat4x2_f32 {
-  vec2 col0;
-  vec2 col1;
-  vec2 col2;
-  vec2 col3;
-};
-
-struct S {
-  int before;
-  mat4x2 m;
-  int after;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  mat4x2_f32 inner[4];
-} u;
-
-mat4x2 p[4] = mat4x2[4](mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
-mat4x2 conv_mat4x2_f32(mat4x2_f32 val) {
-  return mat4x2(val.col0, val.col1, val.col2, val.col3);
-}
-
-mat4x2[4] conv_arr4_mat4x2_f32(mat4x2_f32 val[4]) {
-  mat4x2 arr[4] = mat4x2[4](mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_mat4x2_f32(val[i]);
-    }
-  }
-  return arr;
-}
-
-void f() {
-  p = conv_arr4_mat4x2_f32(u.inner);
-  p[1] = conv_mat4x2_f32(u.inner[2u]);
-  p[1][0] = u.inner[0u].col1.yx;
-  p[1][0].x = u.inner[0u].col1[0u];
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.msl
deleted file mode 100644
index cd97f91..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.msl
+++ /dev/null

@@ -1,31 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  int before;
-  float4x2 m;
-  int after;
-};
-
-kernel void f(const constant tint_array<float4x2, 4>* tint_symbol_1 [[buffer(0)]]) {
-  thread tint_array<float4x2, 4> tint_symbol = {};
-  tint_symbol = *(tint_symbol_1);
-  tint_symbol[1] = (*(tint_symbol_1))[2];
-  tint_symbol[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
-  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.wgsl
deleted file mode 100644
index 04780dc..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
-
-var<private> p : array<mat4x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  p = u;
-  p[1] = u[2];
-  p[1][0] = u[0][1].yx;
-  p[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl
deleted file mode 100644
index fcea4e7..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
-@group(0) @binding(1) var<storage, read_write> s : array<mat4x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    s = u;
-    s[1] = u[2];
-    s[1][0] = u[0][1].yx;
-    s[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.glsl
deleted file mode 100644
index b7e9977..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.glsl
+++ /dev/null

@@ -1,49 +0,0 @@
-#version 310 es
-
-struct mat4x2_f32 {
-  vec2 col0;
-  vec2 col1;
-  vec2 col2;
-  vec2 col3;
-};
-
-struct S {
-  int before;
-  mat4x2 m;
-  int after;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  mat4x2_f32 inner[4];
-} u;
-
-layout(binding = 1, std430) buffer u_block_ssbo {
-  mat4x2 inner[4];
-} s;
-
-mat4x2 conv_mat4x2_f32(mat4x2_f32 val) {
-  return mat4x2(val.col0, val.col1, val.col2, val.col3);
-}
-
-mat4x2[4] conv_arr4_mat4x2_f32(mat4x2_f32 val[4]) {
-  mat4x2 arr[4] = mat4x2[4](mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_mat4x2_f32(val[i]);
-    }
-  }
-  return arr;
-}
-
-void f() {
-  s.inner = conv_arr4_mat4x2_f32(u.inner);
-  s.inner[1] = conv_mat4x2_f32(u.inner[2u]);
-  s.inner[1][0] = u.inner[0u].col1.yx;
-  s.inner[1][0].x = u.inner[0u].col1[0u];
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.msl
deleted file mode 100644
index abded74..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.msl
+++ /dev/null

@@ -1,30 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  int before;
-  float4x2 m;
-  int after;
-};
-
-kernel void f(device tint_array<float4x2, 4>* tint_symbol [[buffer(1)]], const constant tint_array<float4x2, 4>* tint_symbol_1 [[buffer(0)]]) {
-  *(tint_symbol) = *(tint_symbol_1);
-  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
-  (*(tint_symbol))[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
-  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.wgsl
deleted file mode 100644
index d32f228..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<mat4x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  s = u;
-  s[1] = u[2];
-  s[1][0] = u[0][1].yx;
-  s[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl
deleted file mode 100644
index b1368c8..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
-var<workgroup> w : array<mat4x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    w = u;
-    w[1] = u[2];
-    w[1][0] = u[0][1].yx;
-    w[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.glsl
deleted file mode 100644
index df52a3b..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.glsl
+++ /dev/null

@@ -1,53 +0,0 @@
-#version 310 es
-
-struct mat4x2_f32 {
-  vec2 col0;
-  vec2 col1;
-  vec2 col2;
-  vec2 col3;
-};
-
-struct S {
-  int before;
-  mat4x2 m;
-  int after;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  mat4x2_f32 inner[4];
-} u;
-
-shared mat4x2 w[4];
-mat4x2 conv_mat4x2_f32(mat4x2_f32 val) {
-  return mat4x2(val.col0, val.col1, val.col2, val.col3);
-}
-
-mat4x2[4] conv_arr4_mat4x2_f32(mat4x2_f32 val[4]) {
-  mat4x2 arr[4] = mat4x2[4](mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_mat4x2_f32(val[i]);
-    }
-  }
-  return arr;
-}
-
-void f(uint local_invocation_index) {
-  {
-    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-      uint i = idx;
-      w[i] = mat4x2(vec2(0.0f), vec2(0.0f), vec2(0.0f), vec2(0.0f));
-    }
-  }
-  barrier();
-  w = conv_arr4_mat4x2_f32(u.inner);
-  w[1] = conv_mat4x2_f32(u.inner[2u]);
-  w[1][0] = u.inner[0u].col1.yx;
-  w[1][0].x = u.inner[0u].col1[0u];
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f(gl_LocalInvocationIndex);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.msl
deleted file mode 100644
index e4001f3..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.msl
+++ /dev/null

@@ -1,44 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct tint_symbol_5 {
-  tint_array<float4x2, 4> w;
-};
-
-struct S {
-  int before;
-  float4x2 m;
-  int after;
-};
-
-void f_inner(uint local_invocation_index, threadgroup tint_array<float4x2, 4>* const tint_symbol, const constant tint_array<float4x2, 4>* const tint_symbol_1) {
-  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-    uint const i = idx;
-    (*(tint_symbol))[i] = float4x2(float2(0.0f), float2(0.0f), float2(0.0f), float2(0.0f));
-  }
-  threadgroup_barrier(mem_flags::mem_threadgroup);
-  *(tint_symbol) = *(tint_symbol_1);
-  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
-  (*(tint_symbol))[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
-  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
-}
-
-kernel void f(const constant tint_array<float4x2, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
-  threadgroup tint_array<float4x2, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
-  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.wgsl
deleted file mode 100644
index 9948855..0000000
--- a/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
-
-var<workgroup> w : array<mat4x2<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  w = u;
-  w[1] = u[2];
-  w[1][0] = u[0][1].yx;
-  w[1][0].x = u[0][1].x;
-}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..54a275a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x2<f16>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat4x2<f16>, 4> = *p_a;
+  let l_a_i     : mat4x2<f16>           = *p_a_i;
+  let l_a_i_i   : vec2<f16>             = *p_a_i_i;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c5d26b1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,44 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 2> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 4, 2> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 2> l_a_i = tint_symbol_1(a, (16u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((16u * uint(p_a_i_save)) + (4u * uint(p_a_i_i_save)))) / 4;
+  uint ubo_load_4 = a[scalar_offset_4 / 4][scalar_offset_4 % 4];
+  const vector<float16_t, 2> l_a_i_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..101f0c8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,49 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 2> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 4, 2> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 2> l_a_i = tint_symbol_1(a, (16u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((16u * uint(p_a_i_save)) + (4u * uint(p_a_i_i_save)))) / 4;
+  uint ubo_load_4 = a[scalar_offset_4 / 4][scalar_offset_4 % 4];
+  const vector<float16_t, 2> l_a_i_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000252EDBAA7F0(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..fdbb9a7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,75 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x2_f16 {
+  f16vec2 col0;
+  f16vec2 col1;
+  f16vec2 col2;
+  f16vec2 col3;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat4x2_f16 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat4x2 conv_mat4x2_f16(mat4x2_f16 val) {
+  return f16mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x2[4] conv_arr4_mat4x2_f16(mat4x2_f16 val[4]) {
+  f16mat4x2 arr[4] = f16mat4x2[4](f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x2_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16vec2 load_a_inner_p0_p1(uint p0, uint p1) {
+  switch(p1) {
+    case 0u: {
+      return a.inner[p0].col0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].col1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].col2;
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].col3;
+      break;
+    }
+    default: {
+      return f16vec2(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat4x2 p_a[4] = conv_arr4_mat4x2_f16(a.inner);
+  int tint_symbol = i();
+  f16mat4x2 p_a_i = conv_mat4x2_f16(a.inner[tint_symbol]);
+  int tint_symbol_1 = i();
+  f16vec2 p_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+  f16mat4x2 l_a[4] = conv_arr4_mat4x2_f16(a.inner);
+  f16mat4x2 l_a_i = conv_mat4x2_f16(a.inner[tint_symbol]);
+  f16vec2 l_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..f850ce9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<half4x2, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<half4x2, 4> const l_a = *(tint_symbol_3);
+  half4x2 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  half2 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..a141530
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,181 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 108
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat4x2_f16 "mat4x2_f16"
+               OpMemberName %mat4x2_f16 0 "col0"
+               OpMemberName %mat4x2_f16 1 "col1"
+               OpMemberName %mat4x2_f16 2 "col2"
+               OpMemberName %mat4x2_f16 3 "col3"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %conv_mat4x2_f16 "conv_mat4x2_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x2_f16 "conv_arr4_mat4x2_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_p1 "load_a_inner_p0_p1"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 1 Offset 4
+               OpMemberDecorate %mat4x2_f16 2 Offset 8
+               OpMemberDecorate %mat4x2_f16 3 Offset 12
+               OpDecorate %_arr_mat4x2_f16_uint_4 ArrayStride 16
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat4v2half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+ %mat4x2_f16 = OpTypeStruct %v2half %v2half %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x2_f16_uint_4 = OpTypeArray %mat4x2_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat4x2_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v2half = OpTypeMatrix %v2half 4
+         %21 = OpTypeFunction %mat4v2half %mat4x2_f16
+%_arr_mat4v2half_uint_4 = OpTypeArray %mat4v2half %uint_4
+         %31 = OpTypeFunction %_arr_mat4v2half_uint_4 %_arr_mat4x2_f16_uint_4
+%_ptr_Function__arr_mat4v2half_uint_4 = OpTypePointer Function %_arr_mat4v2half_uint_4
+         %38 = OpConstantNull %_arr_mat4v2half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %41 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x2_f16_uint_4 = OpTypePointer Function %_arr_mat4x2_f16_uint_4
+         %54 = OpConstantNull %_arr_mat4x2_f16_uint_4
+%_ptr_Function_mat4v2half = OpTypePointer Function %mat4v2half
+%_ptr_Function_mat4x2_f16 = OpTypePointer Function %mat4x2_f16
+     %uint_1 = OpConstant %uint 1
+         %67 = OpTypeFunction %v2half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+         %90 = OpConstantNull %v2half
+       %void = OpTypeVoid
+         %91 = OpTypeFunction %void
+%_ptr_Uniform__arr_mat4x2_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x2_f16_uint_4
+%_ptr_Uniform_mat4x2_f16 = OpTypePointer Uniform %mat4x2_f16
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+%conv_mat4x2_f16 = OpFunction %mat4v2half None %21
+        %val = OpFunctionParameter %mat4x2_f16
+         %25 = OpLabel
+         %26 = OpCompositeExtract %v2half %val 0
+         %27 = OpCompositeExtract %v2half %val 1
+         %28 = OpCompositeExtract %v2half %val 2
+         %29 = OpCompositeExtract %v2half %val 3
+         %30 = OpCompositeConstruct %mat4v2half %26 %27 %28 %29
+               OpReturnValue %30
+               OpFunctionEnd
+%conv_arr4_mat4x2_f16 = OpFunction %_arr_mat4v2half_uint_4 None %31
+      %val_0 = OpFunctionParameter %_arr_mat4x2_f16_uint_4
+         %35 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v2half_uint_4 Function %38
+        %i_0 = OpVariable %_ptr_Function_uint Function %41
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x2_f16_uint_4 Function %54
+               OpBranch %42
+         %42 = OpLabel
+               OpLoopMerge %43 %44 None
+               OpBranch %45
+         %45 = OpLabel
+         %47 = OpLoad %uint %i_0
+         %48 = OpULessThan %bool %47 %uint_4
+         %46 = OpLogicalNot %bool %48
+               OpSelectionMerge %50 None
+               OpBranchConditional %46 %51 %50
+         %51 = OpLabel
+               OpBranch %43
+         %50 = OpLabel
+               OpStore %var_for_index %val_0
+         %55 = OpLoad %uint %i_0
+         %57 = OpAccessChain %_ptr_Function_mat4v2half %arr %55
+         %59 = OpLoad %uint %i_0
+         %61 = OpAccessChain %_ptr_Function_mat4x2_f16 %var_for_index %59
+         %62 = OpLoad %mat4x2_f16 %61
+         %58 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %62
+               OpStore %57 %58
+               OpBranch %44
+         %44 = OpLabel
+         %63 = OpLoad %uint %i_0
+         %65 = OpIAdd %uint %63 %uint_1
+               OpStore %i_0 %65
+               OpBranch %42
+         %43 = OpLabel
+         %66 = OpLoad %_arr_mat4v2half_uint_4 %arr
+               OpReturnValue %66
+               OpFunctionEnd
+%load_a_inner_p0_p1 = OpFunction %v2half None %67
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+         %71 = OpLabel
+               OpSelectionMerge %72 None
+               OpSwitch %p1 %73 0 %74 1 %75 2 %76 3 %77
+         %74 = OpLabel
+         %80 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0 %uint_0
+         %81 = OpLoad %v2half %80
+               OpReturnValue %81
+         %75 = OpLabel
+         %82 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0 %uint_1
+         %83 = OpLoad %v2half %82
+               OpReturnValue %83
+         %76 = OpLabel
+         %85 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0 %uint_2
+         %86 = OpLoad %v2half %85
+               OpReturnValue %86
+         %77 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0 %uint_3
+         %89 = OpLoad %v2half %88
+               OpReturnValue %89
+         %73 = OpLabel
+               OpReturnValue %90
+         %72 = OpLabel
+               OpReturnValue %90
+               OpFunctionEnd
+          %f = OpFunction %void None %91
+         %94 = OpLabel
+         %95 = OpFunctionCall %int %i
+         %96 = OpFunctionCall %int %i
+         %99 = OpAccessChain %_ptr_Uniform__arr_mat4x2_f16_uint_4 %a %uint_0
+        %100 = OpLoad %_arr_mat4x2_f16_uint_4 %99
+         %97 = OpFunctionCall %_arr_mat4v2half_uint_4 %conv_arr4_mat4x2_f16 %100
+        %103 = OpAccessChain %_ptr_Uniform_mat4x2_f16 %a %uint_0 %95
+        %104 = OpLoad %mat4x2_f16 %103
+        %101 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %104
+        %106 = OpBitcast %uint %95
+        %107 = OpBitcast %uint %96
+        %105 = OpFunctionCall %v2half %load_a_inner_p0_p1 %106 %107
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..d888ce9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x2<f16>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat4x2<f16>, 4> = *(p_a);
+  let l_a_i : mat4x2<f16> = *(p_a_i);
+  let l_a_i_i : vec2<f16> = *(p_a_i_i);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..c4faaa8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,14 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x2<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat4x2<f16>, 4> = *p_a;
+  let l_a_i     : mat4x2<f16>           = *p_a_2;
+  let l_a_i_i   : vec2<f16>             = *p_a_2_1;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0ffe38c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,35 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+
+matrix<float16_t, 4, 2> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 2> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 2> l_a_i = tint_symbol_1(a, 32u);
+  uint ubo_load_4 = a[2].y;
+  const vector<float16_t, 2> l_a_i_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..1a318b5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,40 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[4];
+};
+
+matrix<float16_t, 4, 2> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 2> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 2> l_a_i = tint_symbol_1(a, 32u);
+  uint ubo_load_4 = a[2].y;
+  const vector<float16_t, 2> l_a_i_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000014EAAC1C520(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..1f206c5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,42 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x2_f16 {
+  f16vec2 col0;
+  f16vec2 col1;
+  f16vec2 col2;
+  f16vec2 col3;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat4x2_f16 inner[4];
+} a;
+
+f16mat4x2 conv_mat4x2_f16(mat4x2_f16 val) {
+  return f16mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x2[4] conv_arr4_mat4x2_f16(mat4x2_f16 val[4]) {
+  f16mat4x2 arr[4] = f16mat4x2[4](f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x2_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  f16mat4x2 p_a[4] = conv_arr4_mat4x2_f16(a.inner);
+  f16mat4x2 p_a_2 = conv_mat4x2_f16(a.inner[2u]);
+  f16vec2 p_a_2_1 = a.inner[2u].col1;
+  f16mat4x2 l_a[4] = conv_arr4_mat4x2_f16(a.inner);
+  f16mat4x2 l_a_i = conv_mat4x2_f16(a.inner[2u]);
+  f16vec2 l_a_i_i = a.inner[2u].col1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..b804719
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x2, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<half4x2, 4> const l_a = *(tint_symbol);
+  half4x2 const l_a_i = (*(tint_symbol))[2];
+  half2 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..03a69bc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,129 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 73
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat4x2_f16 "mat4x2_f16"
+               OpMemberName %mat4x2_f16 0 "col0"
+               OpMemberName %mat4x2_f16 1 "col1"
+               OpMemberName %mat4x2_f16 2 "col2"
+               OpMemberName %mat4x2_f16 3 "col3"
+               OpName %a "a"
+               OpName %conv_mat4x2_f16 "conv_mat4x2_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x2_f16 "conv_arr4_mat4x2_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 1 Offset 4
+               OpMemberDecorate %mat4x2_f16 2 Offset 8
+               OpMemberDecorate %mat4x2_f16 3 Offset 12
+               OpDecorate %_arr_mat4x2_f16_uint_4 ArrayStride 16
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat4v2half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+ %mat4x2_f16 = OpTypeStruct %v2half %v2half %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x2_f16_uint_4 = OpTypeArray %mat4x2_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat4x2_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+         %10 = OpTypeFunction %mat4v2half %mat4x2_f16
+%_arr_mat4v2half_uint_4 = OpTypeArray %mat4v2half %uint_4
+         %20 = OpTypeFunction %_arr_mat4v2half_uint_4 %_arr_mat4x2_f16_uint_4
+%_ptr_Function__arr_mat4v2half_uint_4 = OpTypePointer Function %_arr_mat4v2half_uint_4
+         %27 = OpConstantNull %_arr_mat4v2half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %30 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x2_f16_uint_4 = OpTypePointer Function %_arr_mat4x2_f16_uint_4
+         %43 = OpConstantNull %_arr_mat4x2_f16_uint_4
+%_ptr_Function_mat4v2half = OpTypePointer Function %mat4v2half
+%_ptr_Function_mat4x2_f16 = OpTypePointer Function %mat4x2_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %56 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x2_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x2_f16_uint_4
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x2_f16 = OpTypePointer Uniform %mat4x2_f16
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+%conv_mat4x2_f16 = OpFunction %mat4v2half None %10
+        %val = OpFunctionParameter %mat4x2_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v2half %val 0
+         %16 = OpCompositeExtract %v2half %val 1
+         %17 = OpCompositeExtract %v2half %val 2
+         %18 = OpCompositeExtract %v2half %val 3
+         %19 = OpCompositeConstruct %mat4v2half %15 %16 %17 %18
+               OpReturnValue %19
+               OpFunctionEnd
+%conv_arr4_mat4x2_f16 = OpFunction %_arr_mat4v2half_uint_4 None %20
+      %val_0 = OpFunctionParameter %_arr_mat4x2_f16_uint_4
+         %24 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v2half_uint_4 Function %27
+          %i = OpVariable %_ptr_Function_uint Function %30
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x2_f16_uint_4 Function %43
+               OpBranch %31
+         %31 = OpLabel
+               OpLoopMerge %32 %33 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %uint %i
+         %37 = OpULessThan %bool %36 %uint_4
+         %35 = OpLogicalNot %bool %37
+               OpSelectionMerge %39 None
+               OpBranchConditional %35 %40 %39
+         %40 = OpLabel
+               OpBranch %32
+         %39 = OpLabel
+               OpStore %var_for_index %val_0
+         %44 = OpLoad %uint %i
+         %46 = OpAccessChain %_ptr_Function_mat4v2half %arr %44
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_mat4x2_f16 %var_for_index %48
+         %51 = OpLoad %mat4x2_f16 %50
+         %47 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %51
+               OpStore %46 %47
+               OpBranch %33
+         %33 = OpLabel
+         %52 = OpLoad %uint %i
+         %54 = OpIAdd %uint %52 %uint_1
+               OpStore %i %54
+               OpBranch %31
+         %32 = OpLabel
+         %55 = OpLoad %_arr_mat4v2half_uint_4 %arr
+               OpReturnValue %55
+               OpFunctionEnd
+          %f = OpFunction %void None %56
+         %59 = OpLabel
+         %63 = OpAccessChain %_ptr_Uniform__arr_mat4x2_f16_uint_4 %a %uint_0
+         %64 = OpLoad %_arr_mat4x2_f16_uint_4 %63
+         %60 = OpFunctionCall %_arr_mat4v2half_uint_4 %conv_arr4_mat4x2_f16 %64
+         %68 = OpAccessChain %_ptr_Uniform_mat4x2_f16 %a %uint_0 %uint_2
+         %69 = OpLoad %mat4x2_f16 %68
+         %65 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %69
+         %71 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %uint_2 %uint_1
+         %72 = OpLoad %v2half %71
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..cbfb856
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x2<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat4x2<f16>, 4> = *(p_a);
+  let l_a_i : mat4x2<f16> = *(p_a_2);
+  let l_a_i_i : vec2<f16> = *(p_a_2_1);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl
new file mode 100644
index 0000000..ff810e1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].yx);
+    let a = abs(u[0][1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..cddeb05
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,25 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 4> t = transpose(tint_symbol(u, 32u));
+  uint ubo_load_4 = u[0].y;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  uint ubo_load_5 = u[0].y;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5531d39
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,30 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 4> t = transpose(tint_symbol(u, 32u));
+  uint ubo_load_4 = u[0].y;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  uint ubo_load_5 = u[0].y;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000025103259540(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..36a1082
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x2_f16 {
+  f16vec2 col0;
+  f16vec2 col1;
+  f16vec2 col2;
+  f16vec2 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x2_f16 inner[4];
+} u;
+
+f16mat4x2 conv_mat4x2_f16(mat4x2_f16 val) {
+  return f16mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+void f() {
+  f16mat2x4 t = transpose(conv_mat4x2_f16(u.inner[2u]));
+  float16_t l = length(u.inner[0u].col1.yx);
+  float16_t a = abs(u.inner[0u].col1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..58f8a98
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x2, 4>* tint_symbol [[buffer(0)]]) {
+  half2x4 const t = transpose((*(tint_symbol))[2]);
+  half const l = length(half2((*(tint_symbol))[0][1]).yx);
+  half const a = fabs(half2((*(tint_symbol))[0][1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..78be241
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,83 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %34 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x2_f16 "mat4x2_f16"
+               OpMemberName %mat4x2_f16 0 "col0"
+               OpMemberName %mat4x2_f16 1 "col1"
+               OpMemberName %mat4x2_f16 2 "col2"
+               OpMemberName %mat4x2_f16 3 "col3"
+               OpName %u "u"
+               OpName %conv_mat4x2_f16 "conv_mat4x2_f16"
+               OpName %val "val"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 1 Offset 4
+               OpMemberDecorate %mat4x2_f16 2 Offset 8
+               OpMemberDecorate %mat4x2_f16 3 Offset 12
+               OpDecorate %_arr_mat4x2_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+ %mat4x2_f16 = OpTypeStruct %v2half %v2half %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x2_f16_uint_4 = OpTypeArray %mat4x2_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x2_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+         %10 = OpTypeFunction %mat4v2half %mat4x2_f16
+       %void = OpTypeVoid
+         %20 = OpTypeFunction %void
+     %v4half = OpTypeVector %half 4
+ %mat2v4half = OpTypeMatrix %v4half 2
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x2_f16 = OpTypePointer Uniform %mat4x2_f16
+         %35 = OpConstantNull %uint
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+%conv_mat4x2_f16 = OpFunction %mat4v2half None %10
+        %val = OpFunctionParameter %mat4x2_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v2half %val 0
+         %16 = OpCompositeExtract %v2half %val 1
+         %17 = OpCompositeExtract %v2half %val 2
+         %18 = OpCompositeExtract %v2half %val 3
+         %19 = OpCompositeConstruct %mat4v2half %15 %16 %17 %18
+               OpReturnValue %19
+               OpFunctionEnd
+          %f = OpFunction %void None %20
+         %23 = OpLabel
+         %31 = OpAccessChain %_ptr_Uniform_mat4x2_f16 %u %uint_0 %uint_2
+         %32 = OpLoad %mat4x2_f16 %31
+         %27 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %32
+         %24 = OpTranspose %mat2v4half %27
+         %38 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %35 %uint_1
+         %39 = OpLoad %v2half %38
+         %40 = OpVectorShuffle %v2half %39 %39 1 0
+         %33 = OpExtInst %half %34 Length %40
+         %42 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %35 %uint_1
+         %43 = OpLoad %v2half %42
+         %44 = OpVectorShuffle %v2half %43 %43 1 0
+         %45 = OpCompositeExtract %half %44 0
+         %41 = OpExtInst %half %34 FAbs %45
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..de84569
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].yx);
+  let a = abs(u[0][1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl
new file mode 100644
index 0000000..09c953b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f16>, 4>;
+
+fn a(a : array<mat4x2<f16>, 4>) {}
+fn b(m : mat4x2<f16>) {}
+fn c(v : vec2<f16>) {}
+fn d(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].yx);
+    d(u[1][0].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3895e31
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,49 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+void a(matrix<float16_t, 4, 2> a_1[4]) {
+}
+
+void b(matrix<float16_t, 4, 2> m) {
+}
+
+void c(vector<float16_t, 2> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 2> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 16u));
+  uint ubo_load_4 = u[1].x;
+  c(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  uint ubo_load_5 = u[1].x;
+  d(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..3a1afc5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,57 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+void a(matrix<float16_t, 4, 2> a_1[4]) {
+}
+
+void b(matrix<float16_t, 4, 2> m) {
+}
+
+void c(vector<float16_t, 2> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 2> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 16u));
+  uint ubo_load_4 = u[1].x;
+  c(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  uint ubo_load_5 = u[1].x;
+  d(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000029B724CE850(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000029B724CE850(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000029B724CE850(11,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000029B724CE850(14,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..df1283c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,52 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x2_f16 {
+  f16vec2 col0;
+  f16vec2 col1;
+  f16vec2 col2;
+  f16vec2 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x2_f16 inner[4];
+} u;
+
+void a(f16mat4x2 a_1[4]) {
+}
+
+void b(f16mat4x2 m) {
+}
+
+void c(f16vec2 v) {
+}
+
+void d(float16_t f_1) {
+}
+
+f16mat4x2 conv_mat4x2_f16(mat4x2_f16 val) {
+  return f16mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x2[4] conv_arr4_mat4x2_f16(mat4x2_f16 val[4]) {
+  f16mat4x2 arr[4] = f16mat4x2[4](f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x2_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  a(conv_arr4_mat4x2_f16(u.inner));
+  b(conv_mat4x2_f16(u.inner[1u]));
+  c(u.inner[1u].col0.yx);
+  d(u.inner[1u].col0.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..66b4cf5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<half4x2, 4> a_1) {
+}
+
+void b(half4x2 m) {
+}
+
+void c(half2 v) {
+}
+
+void d(half f_1) {
+}
+
+kernel void f(const constant tint_array<half4x2, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(half2((*(tint_symbol))[1][0]).yx);
+  d(half2((*(tint_symbol))[1][0]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..ee9bd13
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,169 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 97
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x2_f16 "mat4x2_f16"
+               OpMemberName %mat4x2_f16 0 "col0"
+               OpMemberName %mat4x2_f16 1 "col1"
+               OpMemberName %mat4x2_f16 2 "col2"
+               OpMemberName %mat4x2_f16 3 "col3"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %conv_mat4x2_f16 "conv_mat4x2_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x2_f16 "conv_arr4_mat4x2_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 1 Offset 4
+               OpMemberDecorate %mat4x2_f16 2 Offset 8
+               OpMemberDecorate %mat4x2_f16 3 Offset 12
+               OpDecorate %_arr_mat4x2_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v2half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+ %mat4x2_f16 = OpTypeStruct %v2half %v2half %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x2_f16_uint_4 = OpTypeArray %mat4x2_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x2_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat4v2half = OpTypeMatrix %v2half 4
+%_arr_mat4v2half_uint_4 = OpTypeArray %mat4v2half %uint_4
+         %10 = OpTypeFunction %void %_arr_mat4v2half_uint_4
+         %17 = OpTypeFunction %void %mat4v2half
+         %21 = OpTypeFunction %void %v2half
+         %25 = OpTypeFunction %void %half
+         %29 = OpTypeFunction %mat4v2half %mat4x2_f16
+         %38 = OpTypeFunction %_arr_mat4v2half_uint_4 %_arr_mat4x2_f16_uint_4
+%_ptr_Function__arr_mat4v2half_uint_4 = OpTypePointer Function %_arr_mat4v2half_uint_4
+         %44 = OpConstantNull %_arr_mat4v2half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %47 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x2_f16_uint_4 = OpTypePointer Function %_arr_mat4x2_f16_uint_4
+         %60 = OpConstantNull %_arr_mat4x2_f16_uint_4
+%_ptr_Function_mat4v2half = OpTypePointer Function %mat4v2half
+%_ptr_Function_mat4x2_f16 = OpTypePointer Function %mat4x2_f16
+     %uint_1 = OpConstant %uint 1
+         %73 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x2_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x2_f16_uint_4
+%_ptr_Uniform_mat4x2_f16 = OpTypePointer Uniform %mat4x2_f16
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat4v2half_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %m = OpFunctionParameter %mat4v2half
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %v = OpFunctionParameter %v2half
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+        %f_1 = OpFunctionParameter %half
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%conv_mat4x2_f16 = OpFunction %mat4v2half None %29
+        %val = OpFunctionParameter %mat4x2_f16
+         %32 = OpLabel
+         %33 = OpCompositeExtract %v2half %val 0
+         %34 = OpCompositeExtract %v2half %val 1
+         %35 = OpCompositeExtract %v2half %val 2
+         %36 = OpCompositeExtract %v2half %val 3
+         %37 = OpCompositeConstruct %mat4v2half %33 %34 %35 %36
+               OpReturnValue %37
+               OpFunctionEnd
+%conv_arr4_mat4x2_f16 = OpFunction %_arr_mat4v2half_uint_4 None %38
+      %val_0 = OpFunctionParameter %_arr_mat4x2_f16_uint_4
+         %41 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v2half_uint_4 Function %44
+          %i = OpVariable %_ptr_Function_uint Function %47
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x2_f16_uint_4 Function %60
+               OpBranch %48
+         %48 = OpLabel
+               OpLoopMerge %49 %50 None
+               OpBranch %51
+         %51 = OpLabel
+         %53 = OpLoad %uint %i
+         %54 = OpULessThan %bool %53 %uint_4
+         %52 = OpLogicalNot %bool %54
+               OpSelectionMerge %56 None
+               OpBranchConditional %52 %57 %56
+         %57 = OpLabel
+               OpBranch %49
+         %56 = OpLabel
+               OpStore %var_for_index %val_0
+         %61 = OpLoad %uint %i
+         %63 = OpAccessChain %_ptr_Function_mat4v2half %arr %61
+         %65 = OpLoad %uint %i
+         %67 = OpAccessChain %_ptr_Function_mat4x2_f16 %var_for_index %65
+         %68 = OpLoad %mat4x2_f16 %67
+         %64 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %68
+               OpStore %63 %64
+               OpBranch %50
+         %50 = OpLabel
+         %69 = OpLoad %uint %i
+         %71 = OpIAdd %uint %69 %uint_1
+               OpStore %i %71
+               OpBranch %48
+         %49 = OpLabel
+         %72 = OpLoad %_arr_mat4v2half_uint_4 %arr
+               OpReturnValue %72
+               OpFunctionEnd
+          %f = OpFunction %void None %73
+         %75 = OpLabel
+         %80 = OpAccessChain %_ptr_Uniform__arr_mat4x2_f16_uint_4 %u %uint_0
+         %81 = OpLoad %_arr_mat4x2_f16_uint_4 %80
+         %77 = OpFunctionCall %_arr_mat4v2half_uint_4 %conv_arr4_mat4x2_f16 %81
+         %76 = OpFunctionCall %void %a %77
+         %85 = OpAccessChain %_ptr_Uniform_mat4x2_f16 %u %uint_0 %uint_1
+         %86 = OpLoad %mat4x2_f16 %85
+         %83 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %86
+         %82 = OpFunctionCall %void %b %83
+         %89 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %uint_1 %uint_0
+         %90 = OpLoad %v2half %89
+         %91 = OpVectorShuffle %v2half %90 %90 1 0
+         %87 = OpFunctionCall %void %c %91
+         %93 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %uint_1 %uint_0
+         %94 = OpLoad %v2half %93
+         %95 = OpVectorShuffle %v2half %94 %94 1 0
+         %96 = OpCompositeExtract %half %95 0
+         %92 = OpFunctionCall %void %d %96
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..3d5ca68
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,23 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f16>, 4>;
+
+fn a(a : array<mat4x2<f16>, 4>) {
+}
+
+fn b(m : mat4x2<f16>) {
+}
+
+fn c(v : vec2<f16>) {
+}
+
+fn d(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].yx);
+  d(u[1][0].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl
new file mode 100644
index 0000000..54d8d8f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f16>, 4>;
+var<private> p : array<mat4x2<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].yx;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f9a1fc80
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,37 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+static matrix<float16_t, 4, 2> p[4] = (matrix<float16_t, 4, 2>[4])0;
+
+matrix<float16_t, 4, 2> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 32u);
+  uint ubo_load_4 = u[0].y;
+  p[1][0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+  p[1][0].x = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e57ed87
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,42 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+static matrix<float16_t, 4, 2> p[4] = (matrix<float16_t, 4, 2>[4])0;
+
+matrix<float16_t, 4, 2> tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 32u);
+  uint ubo_load_4 = u[0].y;
+  p[1][0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+  p[1][0].x = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000015CB4D295E0(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..d842b0e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,41 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x2_f16 {
+  f16vec2 col0;
+  f16vec2 col1;
+  f16vec2 col2;
+  f16vec2 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x2_f16 inner[4];
+} u;
+
+f16mat4x2 p[4] = f16mat4x2[4](f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+f16mat4x2 conv_mat4x2_f16(mat4x2_f16 val) {
+  return f16mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x2[4] conv_arr4_mat4x2_f16(mat4x2_f16 val[4]) {
+  f16mat4x2 arr[4] = f16mat4x2[4](f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x2_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  p = conv_arr4_mat4x2_f16(u.inner);
+  p[1] = conv_mat4x2_f16(u.inner[2u]);
+  p[1][0] = u.inner[0u].col1.yx;
+  p[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..0b1f07b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x2, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<half4x2, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = half2((*(tint_symbol_1))[0][1]).yx;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..415b8aa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,149 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 88
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x2_f16 "mat4x2_f16"
+               OpMemberName %mat4x2_f16 0 "col0"
+               OpMemberName %mat4x2_f16 1 "col1"
+               OpMemberName %mat4x2_f16 2 "col2"
+               OpMemberName %mat4x2_f16 3 "col3"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %conv_mat4x2_f16 "conv_mat4x2_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x2_f16 "conv_arr4_mat4x2_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 1 Offset 4
+               OpMemberDecorate %mat4x2_f16 2 Offset 8
+               OpMemberDecorate %mat4x2_f16 3 Offset 12
+               OpDecorate %_arr_mat4x2_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v2half_uint_4 ArrayStride 16
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+ %mat4x2_f16 = OpTypeStruct %v2half %v2half %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x2_f16_uint_4 = OpTypeArray %mat4x2_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x2_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+%_arr_mat4v2half_uint_4 = OpTypeArray %mat4v2half %uint_4
+%_ptr_Private__arr_mat4v2half_uint_4 = OpTypePointer Private %_arr_mat4v2half_uint_4
+         %14 = OpConstantNull %_arr_mat4v2half_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat4v2half_uint_4 Private %14
+         %15 = OpTypeFunction %mat4v2half %mat4x2_f16
+         %24 = OpTypeFunction %_arr_mat4v2half_uint_4 %_arr_mat4x2_f16_uint_4
+%_ptr_Function__arr_mat4v2half_uint_4 = OpTypePointer Function %_arr_mat4v2half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %32 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x2_f16_uint_4 = OpTypePointer Function %_arr_mat4x2_f16_uint_4
+         %45 = OpConstantNull %_arr_mat4x2_f16_uint_4
+%_ptr_Function_mat4v2half = OpTypePointer Function %mat4v2half
+%_ptr_Function_mat4x2_f16 = OpTypePointer Function %mat4x2_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %58 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x2_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x2_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat4v2half = OpTypePointer Private %mat4v2half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x2_f16 = OpTypePointer Uniform %mat4x2_f16
+         %76 = OpConstantNull %int
+%_ptr_Private_v2half = OpTypePointer Private %v2half
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+%_ptr_Private_half = OpTypePointer Private %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat4x2_f16 = OpFunction %mat4v2half None %15
+        %val = OpFunctionParameter %mat4x2_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v2half %val 0
+         %20 = OpCompositeExtract %v2half %val 1
+         %21 = OpCompositeExtract %v2half %val 2
+         %22 = OpCompositeExtract %v2half %val 3
+         %23 = OpCompositeConstruct %mat4v2half %19 %20 %21 %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_mat4x2_f16 = OpFunction %_arr_mat4v2half_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_mat4x2_f16_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v2half_uint_4 Function %14
+          %i = OpVariable %_ptr_Function_uint Function %32
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x2_f16_uint_4 Function %45
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %34 %35 None
+               OpBranch %36
+         %36 = OpLabel
+         %38 = OpLoad %uint %i
+         %39 = OpULessThan %bool %38 %uint_4
+         %37 = OpLogicalNot %bool %39
+               OpSelectionMerge %41 None
+               OpBranchConditional %37 %42 %41
+         %42 = OpLabel
+               OpBranch %34
+         %41 = OpLabel
+               OpStore %var_for_index %val_0
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_mat4v2half %arr %46
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_mat4x2_f16 %var_for_index %50
+         %53 = OpLoad %mat4x2_f16 %52
+         %49 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %53
+               OpStore %48 %49
+               OpBranch %35
+         %35 = OpLabel
+         %54 = OpLoad %uint %i
+         %56 = OpIAdd %uint %54 %uint_1
+               OpStore %i %56
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %_arr_mat4v2half_uint_4 %arr
+               OpReturnValue %57
+               OpFunctionEnd
+          %f = OpFunction %void None %58
+         %61 = OpLabel
+         %65 = OpAccessChain %_ptr_Uniform__arr_mat4x2_f16_uint_4 %u %uint_0
+         %66 = OpLoad %_arr_mat4x2_f16_uint_4 %65
+         %62 = OpFunctionCall %_arr_mat4v2half_uint_4 %conv_arr4_mat4x2_f16 %66
+               OpStore %p %62
+         %70 = OpAccessChain %_ptr_Private_mat4v2half %p %int_1
+         %74 = OpAccessChain %_ptr_Uniform_mat4x2_f16 %u %uint_0 %uint_2
+         %75 = OpLoad %mat4x2_f16 %74
+         %71 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %75
+               OpStore %70 %71
+         %78 = OpAccessChain %_ptr_Private_v2half %p %int_1 %76
+         %80 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %32 %uint_1
+         %81 = OpLoad %v2half %80
+         %82 = OpVectorShuffle %v2half %81 %81 1 0
+               OpStore %78 %82
+         %84 = OpAccessChain %_ptr_Private_half %p %int_1 %76 %uint_0
+         %86 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %32 %uint_1 %32
+         %87 = OpLoad %half %86
+               OpStore %84 %87
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..e59b04b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f16>, 4>;
+
+var<private> p : array<mat4x2<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].yx;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl
new file mode 100644
index 0000000..05a7730
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f16>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x2<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].yx;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..8097133
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,53 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value[4]) {
+  matrix<float16_t, 4, 2> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 16u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 2> tint_symbol_4(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 16u, tint_symbol_4(u, 32u));
+  uint ubo_load_4 = u[0].y;
+  s.Store<vector<float16_t, 2> >(16u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  s.Store<float16_t>(16u, float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..489bfd1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,59 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value[4]) {
+  matrix<float16_t, 4, 2> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 16u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 2> tint_symbol_4(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 16u, tint_symbol_4(u, 32u));
+  uint ubo_load_4 = u[0].y;
+  s.Store<vector<float16_t, 2> >(16u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  s.Store<float16_t>(16u, float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F88B180520(6,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F88B180520(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..8913fc4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,44 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x2_f16 {
+  f16vec2 col0;
+  f16vec2 col1;
+  f16vec2 col2;
+  f16vec2 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x2_f16 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat4x2 inner[4];
+} s;
+
+f16mat4x2 conv_mat4x2_f16(mat4x2_f16 val) {
+  return f16mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x2[4] conv_arr4_mat4x2_f16(mat4x2_f16 val[4]) {
+  f16mat4x2 arr[4] = f16mat4x2[4](f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x2_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  s.inner = conv_arr4_mat4x2_f16(u.inner);
+  s.inner[1] = conv_mat4x2_f16(u.inner[2u]);
+  s.inner[1][0] = u.inner[0u].col1.yx;
+  s.inner[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..2b7f2a2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<half4x2, 4>* tint_symbol [[buffer(1)]], const constant tint_array<half4x2, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half2((*(tint_symbol_1))[0][1]).yx;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..0981e80
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,160 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 91
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x2_f16 "mat4x2_f16"
+               OpMemberName %mat4x2_f16 0 "col0"
+               OpMemberName %mat4x2_f16 1 "col1"
+               OpMemberName %mat4x2_f16 2 "col2"
+               OpMemberName %mat4x2_f16 3 "col3"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %conv_mat4x2_f16 "conv_mat4x2_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x2_f16 "conv_arr4_mat4x2_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 1 Offset 4
+               OpMemberDecorate %mat4x2_f16 2 Offset 8
+               OpMemberDecorate %mat4x2_f16 3 Offset 12
+               OpDecorate %_arr_mat4x2_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 4
+               OpDecorate %_arr_mat4v2half_uint_4 ArrayStride 16
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+ %mat4x2_f16 = OpTypeStruct %v2half %v2half %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x2_f16_uint_4 = OpTypeArray %mat4x2_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x2_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+%_arr_mat4v2half_uint_4 = OpTypeArray %mat4v2half %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v2half_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %15 = OpTypeFunction %mat4v2half %mat4x2_f16
+         %24 = OpTypeFunction %_arr_mat4v2half_uint_4 %_arr_mat4x2_f16_uint_4
+%_ptr_Function__arr_mat4v2half_uint_4 = OpTypePointer Function %_arr_mat4v2half_uint_4
+         %30 = OpConstantNull %_arr_mat4v2half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %33 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x2_f16_uint_4 = OpTypePointer Function %_arr_mat4x2_f16_uint_4
+         %46 = OpConstantNull %_arr_mat4x2_f16_uint_4
+%_ptr_Function_mat4v2half = OpTypePointer Function %mat4v2half
+%_ptr_Function_mat4x2_f16 = OpTypePointer Function %mat4x2_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %59 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat4v2half_uint_4 = OpTypePointer StorageBuffer %_arr_mat4v2half_uint_4
+%_ptr_Uniform__arr_mat4x2_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x2_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat4v2half = OpTypePointer StorageBuffer %mat4v2half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x2_f16 = OpTypePointer Uniform %mat4x2_f16
+         %79 = OpConstantNull %int
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat4x2_f16 = OpFunction %mat4v2half None %15
+        %val = OpFunctionParameter %mat4x2_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v2half %val 0
+         %20 = OpCompositeExtract %v2half %val 1
+         %21 = OpCompositeExtract %v2half %val 2
+         %22 = OpCompositeExtract %v2half %val 3
+         %23 = OpCompositeConstruct %mat4v2half %19 %20 %21 %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_mat4x2_f16 = OpFunction %_arr_mat4v2half_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_mat4x2_f16_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v2half_uint_4 Function %30
+          %i = OpVariable %_ptr_Function_uint Function %33
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x2_f16_uint_4 Function %46
+               OpBranch %34
+         %34 = OpLabel
+               OpLoopMerge %35 %36 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %uint %i
+         %40 = OpULessThan %bool %39 %uint_4
+         %38 = OpLogicalNot %bool %40
+               OpSelectionMerge %42 None
+               OpBranchConditional %38 %43 %42
+         %43 = OpLabel
+               OpBranch %35
+         %42 = OpLabel
+               OpStore %var_for_index %val_0
+         %47 = OpLoad %uint %i
+         %49 = OpAccessChain %_ptr_Function_mat4v2half %arr %47
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_mat4x2_f16 %var_for_index %51
+         %54 = OpLoad %mat4x2_f16 %53
+         %50 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %54
+               OpStore %49 %50
+               OpBranch %36
+         %36 = OpLabel
+         %55 = OpLoad %uint %i
+         %57 = OpIAdd %uint %55 %uint_1
+               OpStore %i %57
+               OpBranch %34
+         %35 = OpLabel
+         %58 = OpLoad %_arr_mat4v2half_uint_4 %arr
+               OpReturnValue %58
+               OpFunctionEnd
+          %f = OpFunction %void None %59
+         %62 = OpLabel
+         %65 = OpAccessChain %_ptr_StorageBuffer__arr_mat4v2half_uint_4 %s %uint_0
+         %68 = OpAccessChain %_ptr_Uniform__arr_mat4x2_f16_uint_4 %u %uint_0
+         %69 = OpLoad %_arr_mat4x2_f16_uint_4 %68
+         %66 = OpFunctionCall %_arr_mat4v2half_uint_4 %conv_arr4_mat4x2_f16 %69
+               OpStore %65 %66
+         %73 = OpAccessChain %_ptr_StorageBuffer_mat4v2half %s %uint_0 %int_1
+         %77 = OpAccessChain %_ptr_Uniform_mat4x2_f16 %u %uint_0 %uint_2
+         %78 = OpLoad %mat4x2_f16 %77
+         %74 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %78
+               OpStore %73 %74
+         %81 = OpAccessChain %_ptr_StorageBuffer_v2half %s %uint_0 %int_1 %79
+         %83 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %33 %uint_1
+         %84 = OpLoad %v2half %83
+         %85 = OpVectorShuffle %v2half %84 %84 1 0
+               OpStore %81 %85
+         %87 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %int_1 %79 %uint_0
+         %89 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %33 %uint_1 %33
+         %90 = OpLoad %half %89
+               OpStore %87 %90
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..c80638d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f16>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x2<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].yx;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..7b129e0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f16>, 4>;
+var<workgroup> w : array<mat4x2<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].yx;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f67e431
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,52 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+groupshared matrix<float16_t, 4, 2> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 2> tint_symbol_3(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 4, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 32u);
+  uint ubo_load_4 = u[0].y;
+  w[1][0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+  w[1][0].x = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..03d7569
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,57 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+groupshared matrix<float16_t, 4, 2> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 2> tint_symbol_3(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+typedef matrix<float16_t, 4, 2> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[4], uint offset) {
+  matrix<float16_t, 4, 2> arr[4] = (matrix<float16_t, 4, 2>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 16u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 4, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 32u);
+  uint ubo_load_4 = u[0].y;
+  w[1][0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+  w[1][0].x = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000020D4DFDDE90(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..b0b1940
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,48 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x2_f16 {
+  f16vec2 col0;
+  f16vec2 col1;
+  f16vec2 col2;
+  f16vec2 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x2_f16 inner[4];
+} u;
+
+shared f16mat4x2 w[4];
+f16mat4x2 conv_mat4x2_f16(mat4x2_f16 val) {
+  return f16mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x2[4] conv_arr4_mat4x2_f16(mat4x2_f16 val[4]) {
+  f16mat4x2 arr[4] = f16mat4x2[4](f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x2_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = f16mat4x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf));
+    }
+  }
+  barrier();
+  w = conv_arr4_mat4x2_f16(u.inner);
+  w[1] = conv_mat4x2_f16(u.inner[2u]);
+  w[1][0] = u.inner[0u].col1.yx;
+  w[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..2fe9b38
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<half4x2, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<half4x2, 4>* const tint_symbol, const constant tint_array<half4x2, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = half4x2(half2(0.0h), half2(0.0h), half2(0.0h), half2(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half2((*(tint_symbol_1))[0][1]).yx;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<half4x2, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<half4x2, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..dfe569a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,192 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 113
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x2_f16 "mat4x2_f16"
+               OpMemberName %mat4x2_f16 0 "col0"
+               OpMemberName %mat4x2_f16 1 "col1"
+               OpMemberName %mat4x2_f16 2 "col2"
+               OpMemberName %mat4x2_f16 3 "col3"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %conv_mat4x2_f16 "conv_mat4x2_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x2_f16 "conv_arr4_mat4x2_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 0 Offset 0
+               OpMemberDecorate %mat4x2_f16 1 Offset 4
+               OpMemberDecorate %mat4x2_f16 2 Offset 8
+               OpMemberDecorate %mat4x2_f16 3 Offset 12
+               OpDecorate %_arr_mat4x2_f16_uint_4 ArrayStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v2half_uint_4 ArrayStride 16
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+ %mat4x2_f16 = OpTypeStruct %v2half %v2half %v2half %v2half
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x2_f16_uint_4 = OpTypeArray %mat4x2_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x2_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+%_arr_mat4v2half_uint_4 = OpTypeArray %mat4v2half %uint_4
+%_ptr_Workgroup__arr_mat4v2half_uint_4 = OpTypePointer Workgroup %_arr_mat4v2half_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat4v2half_uint_4 Workgroup
+         %16 = OpTypeFunction %mat4v2half %mat4x2_f16
+         %25 = OpTypeFunction %_arr_mat4v2half_uint_4 %_arr_mat4x2_f16_uint_4
+%_ptr_Function__arr_mat4v2half_uint_4 = OpTypePointer Function %_arr_mat4v2half_uint_4
+         %31 = OpConstantNull %_arr_mat4v2half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %34 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x2_f16_uint_4 = OpTypePointer Function %_arr_mat4x2_f16_uint_4
+         %47 = OpConstantNull %_arr_mat4x2_f16_uint_4
+%_ptr_Function_mat4v2half = OpTypePointer Function %mat4v2half
+%_ptr_Function_mat4x2_f16 = OpTypePointer Function %mat4x2_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %60 = OpTypeFunction %void %uint
+%_ptr_Workgroup_mat4v2half = OpTypePointer Workgroup %mat4v2half
+         %78 = OpConstantNull %mat4v2half
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x2_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x2_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat4x2_f16 = OpTypePointer Uniform %mat4x2_f16
+         %96 = OpConstantNull %int
+%_ptr_Workgroup_v2half = OpTypePointer Workgroup %v2half
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %108 = OpTypeFunction %void
+%conv_mat4x2_f16 = OpFunction %mat4v2half None %16
+        %val = OpFunctionParameter %mat4x2_f16
+         %19 = OpLabel
+         %20 = OpCompositeExtract %v2half %val 0
+         %21 = OpCompositeExtract %v2half %val 1
+         %22 = OpCompositeExtract %v2half %val 2
+         %23 = OpCompositeExtract %v2half %val 3
+         %24 = OpCompositeConstruct %mat4v2half %20 %21 %22 %23
+               OpReturnValue %24
+               OpFunctionEnd
+%conv_arr4_mat4x2_f16 = OpFunction %_arr_mat4v2half_uint_4 None %25
+      %val_0 = OpFunctionParameter %_arr_mat4x2_f16_uint_4
+         %28 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v2half_uint_4 Function %31
+          %i = OpVariable %_ptr_Function_uint Function %34
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x2_f16_uint_4 Function %47
+               OpBranch %35
+         %35 = OpLabel
+               OpLoopMerge %36 %37 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %uint %i
+         %41 = OpULessThan %bool %40 %uint_4
+         %39 = OpLogicalNot %bool %41
+               OpSelectionMerge %43 None
+               OpBranchConditional %39 %44 %43
+         %44 = OpLabel
+               OpBranch %36
+         %43 = OpLabel
+               OpStore %var_for_index %val_0
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_mat4v2half %arr %48
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_mat4x2_f16 %var_for_index %52
+         %55 = OpLoad %mat4x2_f16 %54
+         %51 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %55
+               OpStore %50 %51
+               OpBranch %37
+         %37 = OpLabel
+         %56 = OpLoad %uint %i
+         %58 = OpIAdd %uint %56 %uint_1
+               OpStore %i %58
+               OpBranch %35
+         %36 = OpLabel
+         %59 = OpLoad %_arr_mat4v2half_uint_4 %arr
+               OpReturnValue %59
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %60
+%local_invocation_index = OpFunctionParameter %uint
+         %64 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %34
+               OpStore %idx %local_invocation_index
+               OpBranch %66
+         %66 = OpLabel
+               OpLoopMerge %67 %68 None
+               OpBranch %69
+         %69 = OpLabel
+         %71 = OpLoad %uint %idx
+         %72 = OpULessThan %bool %71 %uint_4
+         %70 = OpLogicalNot %bool %72
+               OpSelectionMerge %73 None
+               OpBranchConditional %70 %74 %73
+         %74 = OpLabel
+               OpBranch %67
+         %73 = OpLabel
+         %75 = OpLoad %uint %idx
+         %77 = OpAccessChain %_ptr_Workgroup_mat4v2half %w %75
+               OpStore %77 %78
+               OpBranch %68
+         %68 = OpLabel
+         %79 = OpLoad %uint %idx
+         %80 = OpIAdd %uint %79 %uint_1
+               OpStore %idx %80
+               OpBranch %66
+         %67 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %87 = OpAccessChain %_ptr_Uniform__arr_mat4x2_f16_uint_4 %u %uint_0
+         %88 = OpLoad %_arr_mat4x2_f16_uint_4 %87
+         %84 = OpFunctionCall %_arr_mat4v2half_uint_4 %conv_arr4_mat4x2_f16 %88
+               OpStore %w %84
+         %91 = OpAccessChain %_ptr_Workgroup_mat4v2half %w %int_1
+         %94 = OpAccessChain %_ptr_Uniform_mat4x2_f16 %u %uint_0 %uint_2
+         %95 = OpLoad %mat4x2_f16 %94
+         %92 = OpFunctionCall %mat4v2half %conv_mat4x2_f16 %95
+               OpStore %91 %92
+         %98 = OpAccessChain %_ptr_Workgroup_v2half %w %int_1 %96
+        %100 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %34 %uint_1
+        %101 = OpLoad %v2half %100
+        %102 = OpVectorShuffle %v2half %101 %101 1 0
+               OpStore %98 %102
+        %104 = OpAccessChain %_ptr_Workgroup_half %w %int_1 %96 %uint_0
+        %106 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %34 %uint_1 %34
+        %107 = OpLoad %half %106
+               OpStore %104 %107
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %108
+        %110 = OpLabel
+        %112 = OpLoad %uint %local_invocation_index_1
+        %111 = OpFunctionCall %void %f_inner %112
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..811c638
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f16>, 4>;
+
+var<workgroup> w : array<mat4x2<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].yx;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/dynamic_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/static_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/static_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl
new file mode 100644
index 0000000..174ac02
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].yx);
+    let a = abs(u[0][1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2e395ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+float4x2 tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x4 t = transpose(tint_symbol(u, 64u));
+  const float l = length(asfloat(u[0].zw).yx);
+  const float a = abs(asfloat(u[0].zw).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2e395ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+float4x2 tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x4 t = transpose(tint_symbol(u, 64u));
+  const float l = length(asfloat(u[0].zw).yx);
+  const float a = abs(asfloat(u[0].zw).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..df5f137
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,28 @@
+#version 310 es
+
+struct mat4x2_f32 {
+  vec2 col0;
+  vec2 col1;
+  vec2 col2;
+  vec2 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x2_f32 inner[4];
+} u;
+
+mat4x2 conv_mat4x2_f32(mat4x2_f32 val) {
+  return mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+void f() {
+  mat2x4 t = transpose(conv_mat4x2_f32(u.inner[2u]));
+  float l = length(u.inner[0u].col1.yx);
+  float a = abs(u.inner[0u].col1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..b8aac7e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float4x2, 4>* tint_symbol [[buffer(0)]]) {
+  float2x4 const t = transpose((*(tint_symbol))[2]);
+  float const l = length(float2((*(tint_symbol))[0][1]).yx);
+  float const a = fabs(float2((*(tint_symbol))[0][1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..a8609dc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,79 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+         %34 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x2_f32 "mat4x2_f32"
+               OpMemberName %mat4x2_f32 0 "col0"
+               OpMemberName %mat4x2_f32 1 "col1"
+               OpMemberName %mat4x2_f32 2 "col2"
+               OpMemberName %mat4x2_f32 3 "col3"
+               OpName %u "u"
+               OpName %conv_mat4x2_f32 "conv_mat4x2_f32"
+               OpName %val "val"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x2_f32 0 Offset 0
+               OpMemberDecorate %mat4x2_f32 1 Offset 8
+               OpMemberDecorate %mat4x2_f32 2 Offset 16
+               OpMemberDecorate %mat4x2_f32 3 Offset 24
+               OpDecorate %_arr_mat4x2_f32_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+ %mat4x2_f32 = OpTypeStruct %v2float %v2float %v2float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x2_f32_uint_4 = OpTypeArray %mat4x2_f32 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x2_f32_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+         %10 = OpTypeFunction %mat4v2float %mat4x2_f32
+       %void = OpTypeVoid
+         %20 = OpTypeFunction %void
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x2_f32 = OpTypePointer Uniform %mat4x2_f32
+         %35 = OpConstantNull %uint
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%conv_mat4x2_f32 = OpFunction %mat4v2float None %10
+        %val = OpFunctionParameter %mat4x2_f32
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v2float %val 0
+         %16 = OpCompositeExtract %v2float %val 1
+         %17 = OpCompositeExtract %v2float %val 2
+         %18 = OpCompositeExtract %v2float %val 3
+         %19 = OpCompositeConstruct %mat4v2float %15 %16 %17 %18
+               OpReturnValue %19
+               OpFunctionEnd
+          %f = OpFunction %void None %20
+         %23 = OpLabel
+         %31 = OpAccessChain %_ptr_Uniform_mat4x2_f32 %u %uint_0 %uint_2
+         %32 = OpLoad %mat4x2_f32 %31
+         %27 = OpFunctionCall %mat4v2float %conv_mat4x2_f32 %32
+         %24 = OpTranspose %mat2v4float %27
+         %38 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %35 %uint_1
+         %39 = OpLoad %v2float %38
+         %40 = OpVectorShuffle %v2float %39 %39 1 0
+         %33 = OpExtInst %float %34 Length %40
+         %42 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %35 %uint_1
+         %43 = OpLoad %v2float %42
+         %44 = OpVectorShuffle %v2float %43 %43 1 0
+         %45 = OpCompositeExtract %float %44 0
+         %41 = OpExtInst %float %34 FAbs %45
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..67aafb6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].yx);
+  let a = abs(u[0][1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_fn.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_fn.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl
new file mode 100644
index 0000000..eb36f48
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
+var<private> p : array<mat4x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].yx;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..533c5dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,40 @@
+#version 310 es
+
+struct mat4x2_f32 {
+  vec2 col0;
+  vec2 col1;
+  vec2 col2;
+  vec2 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x2_f32 inner[4];
+} u;
+
+mat4x2 p[4] = mat4x2[4](mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+mat4x2 conv_mat4x2_f32(mat4x2_f32 val) {
+  return mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+mat4x2[4] conv_arr4_mat4x2_f32(mat4x2_f32 val[4]) {
+  mat4x2 arr[4] = mat4x2[4](mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x2_f32(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  p = conv_arr4_mat4x2_f32(u.inner);
+  p[1] = conv_mat4x2_f32(u.inner[2u]);
+  p[1][0] = u.inner[0u].col1.yx;
+  p[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..03a47ff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float4x2, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<float4x2, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_private.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..01d5033
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
+
+var<private> p : array<mat4x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].yx;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl
new file mode 100644
index 0000000..82a1f0b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].yx;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..d2ac282
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,43 @@
+#version 310 es
+
+struct mat4x2_f32 {
+  vec2 col0;
+  vec2 col1;
+  vec2 col2;
+  vec2 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x2_f32 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat4x2 inner[4];
+} s;
+
+mat4x2 conv_mat4x2_f32(mat4x2_f32 val) {
+  return mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+mat4x2[4] conv_arr4_mat4x2_f32(mat4x2_f32 val[4]) {
+  mat4x2 arr[4] = mat4x2[4](mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x2_f32(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  s.inner = conv_arr4_mat4x2_f32(u.inner);
+  s.inner[1] = conv_mat4x2_f32(u.inner[2u]);
+  s.inner[1][0] = u.inner[0u].col1.yx;
+  s.inner[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..a3aff13
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<float4x2, 4>* tint_symbol [[buffer(1)]], const constant tint_array<float4x2, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_storage.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..b4549c7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].yx;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..5d68d23
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
+var<workgroup> w : array<mat4x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].yx;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..092559f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,47 @@
+#version 310 es
+
+struct mat4x2_f32 {
+  vec2 col0;
+  vec2 col1;
+  vec2 col2;
+  vec2 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x2_f32 inner[4];
+} u;
+
+shared mat4x2 w[4];
+mat4x2 conv_mat4x2_f32(mat4x2_f32 val) {
+  return mat4x2(val.col0, val.col1, val.col2, val.col3);
+}
+
+mat4x2[4] conv_arr4_mat4x2_f32(mat4x2_f32 val[4]) {
+  mat4x2 arr[4] = mat4x2[4](mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x2_f32(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = mat4x2(vec2(0.0f), vec2(0.0f), vec2(0.0f), vec2(0.0f));
+    }
+  }
+  barrier();
+  w = conv_arr4_mat4x2_f32(u.inner);
+  w[1] = conv_mat4x2_f32(u.inner[2u]);
+  w[1][0] = u.inner[0u].col1.yx;
+  w[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..9a7cc49
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<float4x2, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<float4x2, 4>* const tint_symbol, const constant tint_array<float4x2, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = float4x2(float2(0.0f), float2(0.0f), float2(0.0f), float2(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float2((*(tint_symbol_1))[0][1]).yx;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<float4x2, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<float4x2, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/array/mat4x2/to_workgroup.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..f45b673
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x2_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x2<f32>, 4>;
+
+var<workgroup> w : array<mat4x2<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].yx;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..4ae7b5b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f16>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat4x3<f16>, 4> = *p_a;
+  let l_a_i     : mat4x3<f16>           = *p_a_i;
+  let l_a_i_i   : vec3<f16>             = *p_a_i_i;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..87b7680
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,59 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 4, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 3> l_a_i = tint_symbol_1(a, (32u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((32u * uint(p_a_i_save)) + (8u * uint(p_a_i_i_save)))) / 4;
+  uint4 ubo_load_9 = a[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..eed8899
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,64 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 4, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 3> l_a_i = tint_symbol_1(a, (32u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((32u * uint(p_a_i_save)) + (8u * uint(p_a_i_i_save)))) / 4;
+  uint4 ubo_load_9 = a[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000180EC89DCE0(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..36f90b4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,75 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16vec3 load_a_inner_p0_p1(uint p0, uint p1) {
+  switch(p1) {
+    case 0u: {
+      return a.inner[p0].col0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].col1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].col2;
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].col3;
+      break;
+    }
+    default: {
+      return f16vec3(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat4x3 p_a[4] = conv_arr4_mat4x3_f16(a.inner);
+  int tint_symbol = i();
+  f16mat4x3 p_a_i = conv_mat4x3_f16(a.inner[tint_symbol]);
+  int tint_symbol_1 = i();
+  f16vec3 p_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+  f16mat4x3 l_a[4] = conv_arr4_mat4x3_f16(a.inner);
+  f16mat4x3 l_a_i = conv_mat4x3_f16(a.inner[tint_symbol]);
+  f16vec3 l_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..5f57b03
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<half4x3, 4> const l_a = *(tint_symbol_3);
+  half4x3 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  half3 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..fd218f4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,181 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 108
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_p1 "load_a_inner_p0_p1"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v3half = OpTypeMatrix %v3half 4
+         %21 = OpTypeFunction %mat4v3half %mat4x3_f16
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+         %31 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+         %38 = OpConstantNull %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %41 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %54 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+         %67 = OpTypeFunction %v3half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+         %90 = OpConstantNull %v3half
+       %void = OpTypeVoid
+         %91 = OpTypeFunction %void
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %21
+        %val = OpFunctionParameter %mat4x3_f16
+         %25 = OpLabel
+         %26 = OpCompositeExtract %v3half %val 0
+         %27 = OpCompositeExtract %v3half %val 1
+         %28 = OpCompositeExtract %v3half %val 2
+         %29 = OpCompositeExtract %v3half %val 3
+         %30 = OpCompositeConstruct %mat4v3half %26 %27 %28 %29
+               OpReturnValue %30
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %31
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %35 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %38
+        %i_0 = OpVariable %_ptr_Function_uint Function %41
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %54
+               OpBranch %42
+         %42 = OpLabel
+               OpLoopMerge %43 %44 None
+               OpBranch %45
+         %45 = OpLabel
+         %47 = OpLoad %uint %i_0
+         %48 = OpULessThan %bool %47 %uint_4
+         %46 = OpLogicalNot %bool %48
+               OpSelectionMerge %50 None
+               OpBranchConditional %46 %51 %50
+         %51 = OpLabel
+               OpBranch %43
+         %50 = OpLabel
+               OpStore %var_for_index %val_0
+         %55 = OpLoad %uint %i_0
+         %57 = OpAccessChain %_ptr_Function_mat4v3half %arr %55
+         %59 = OpLoad %uint %i_0
+         %61 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %59
+         %62 = OpLoad %mat4x3_f16 %61
+         %58 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %62
+               OpStore %57 %58
+               OpBranch %44
+         %44 = OpLabel
+         %63 = OpLoad %uint %i_0
+         %65 = OpIAdd %uint %63 %uint_1
+               OpStore %i_0 %65
+               OpBranch %42
+         %43 = OpLabel
+         %66 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %66
+               OpFunctionEnd
+%load_a_inner_p0_p1 = OpFunction %v3half None %67
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+         %71 = OpLabel
+               OpSelectionMerge %72 None
+               OpSwitch %p1 %73 0 %74 1 %75 2 %76 3 %77
+         %74 = OpLabel
+         %80 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0 %uint_0
+         %81 = OpLoad %v3half %80
+               OpReturnValue %81
+         %75 = OpLabel
+         %82 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0 %uint_1
+         %83 = OpLoad %v3half %82
+               OpReturnValue %83
+         %76 = OpLabel
+         %85 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0 %uint_2
+         %86 = OpLoad %v3half %85
+               OpReturnValue %86
+         %77 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0 %uint_3
+         %89 = OpLoad %v3half %88
+               OpReturnValue %89
+         %73 = OpLabel
+               OpReturnValue %90
+         %72 = OpLabel
+               OpReturnValue %90
+               OpFunctionEnd
+          %f = OpFunction %void None %91
+         %94 = OpLabel
+         %95 = OpFunctionCall %int %i
+         %96 = OpFunctionCall %int %i
+         %99 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %a %uint_0
+        %100 = OpLoad %_arr_mat4x3_f16_uint_4 %99
+         %97 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %100
+        %103 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %a %uint_0 %95
+        %104 = OpLoad %mat4x3_f16 %103
+        %101 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %104
+        %106 = OpBitcast %uint %95
+        %107 = OpBitcast %uint %96
+        %105 = OpFunctionCall %v3half %load_a_inner_p0_p1 %106 %107
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..c1ce99f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f16>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat4x3<f16>, 4> = *(p_a);
+  let l_a_i : mat4x3<f16> = *(p_a_i);
+  let l_a_i_i : vec3<f16> = *(p_a_i_i);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..43527ff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,14 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat4x3<f16>, 4> = *p_a;
+  let l_a_i     : mat4x3<f16>           = *p_a_2;
+  let l_a_i_i   : vec3<f16>             = *p_a_2_1;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..53dc367
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,49 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 3> l_a_i = tint_symbol_1(a, 64u);
+  uint2 ubo_load_8 = a[4].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..acc35b5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,54 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 3> l_a_i = tint_symbol_1(a, 64u);
+  uint2 ubo_load_8 = a[4].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021B182DE800(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..41093e5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,42 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} a;
+
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  f16mat4x3 p_a[4] = conv_arr4_mat4x3_f16(a.inner);
+  f16mat4x3 p_a_2 = conv_mat4x3_f16(a.inner[2u]);
+  f16vec3 p_a_2_1 = a.inner[2u].col1;
+  f16mat4x3 l_a[4] = conv_arr4_mat4x3_f16(a.inner);
+  f16mat4x3 l_a_i = conv_mat4x3_f16(a.inner[2u]);
+  f16vec3 l_a_i_i = a.inner[2u].col1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..b7999ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<half4x3, 4> const l_a = *(tint_symbol);
+  half4x3 const l_a_i = (*(tint_symbol))[2];
+  half3 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..589a551
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,129 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 73
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %a "a"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+         %10 = OpTypeFunction %mat4v3half %mat4x3_f16
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+         %20 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+         %27 = OpConstantNull %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %30 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %43 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %56 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %10
+        %val = OpFunctionParameter %mat4x3_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v3half %val 0
+         %16 = OpCompositeExtract %v3half %val 1
+         %17 = OpCompositeExtract %v3half %val 2
+         %18 = OpCompositeExtract %v3half %val 3
+         %19 = OpCompositeConstruct %mat4v3half %15 %16 %17 %18
+               OpReturnValue %19
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %20
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %24 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %27
+          %i = OpVariable %_ptr_Function_uint Function %30
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %43
+               OpBranch %31
+         %31 = OpLabel
+               OpLoopMerge %32 %33 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %uint %i
+         %37 = OpULessThan %bool %36 %uint_4
+         %35 = OpLogicalNot %bool %37
+               OpSelectionMerge %39 None
+               OpBranchConditional %35 %40 %39
+         %40 = OpLabel
+               OpBranch %32
+         %39 = OpLabel
+               OpStore %var_for_index %val_0
+         %44 = OpLoad %uint %i
+         %46 = OpAccessChain %_ptr_Function_mat4v3half %arr %44
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %48
+         %51 = OpLoad %mat4x3_f16 %50
+         %47 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %51
+               OpStore %46 %47
+               OpBranch %33
+         %33 = OpLabel
+         %52 = OpLoad %uint %i
+         %54 = OpIAdd %uint %52 %uint_1
+               OpStore %i %54
+               OpBranch %31
+         %32 = OpLabel
+         %55 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %55
+               OpFunctionEnd
+          %f = OpFunction %void None %56
+         %59 = OpLabel
+         %63 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %a %uint_0
+         %64 = OpLoad %_arr_mat4x3_f16_uint_4 %63
+         %60 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %64
+         %68 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %a %uint_0 %uint_2
+         %69 = OpLoad %mat4x3_f16 %68
+         %65 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %69
+         %71 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %uint_2 %uint_1
+         %72 = OpLoad %v3half %71
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..7be7438
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat4x3<f16>, 4> = *(p_a);
+  let l_a_i : mat4x3<f16> = *(p_a_2);
+  let l_a_i_i : vec3<f16> = *(p_a_2_1);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl
new file mode 100644
index 0000000..b61759f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].zxy);
+    let a = abs(u[0][1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..fe691e9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 4> t = transpose(tint_symbol(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[0].zw;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..aa2cb40
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 4> t = transpose(tint_symbol(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[0].zw;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002C9B1C5A400(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..da81276
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} u;
+
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+void f() {
+  f16mat3x4 t = transpose(conv_mat4x3_f16(u.inner[2u]));
+  float16_t l = length(u.inner[0u].col1.zxy);
+  float16_t a = abs(u.inner[0u].col1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..39b9c1c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol [[buffer(0)]]) {
+  half3x4 const t = transpose((*(tint_symbol))[2]);
+  half const l = length(half3((*(tint_symbol))[0][1]).zxy);
+  half const a = fabs(half3((*(tint_symbol))[0][1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..272eab8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,83 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %34 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %u "u"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+         %10 = OpTypeFunction %mat4v3half %mat4x3_f16
+       %void = OpTypeVoid
+         %20 = OpTypeFunction %void
+     %v4half = OpTypeVector %half 4
+ %mat3v4half = OpTypeMatrix %v4half 3
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+         %35 = OpConstantNull %uint
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %10
+        %val = OpFunctionParameter %mat4x3_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v3half %val 0
+         %16 = OpCompositeExtract %v3half %val 1
+         %17 = OpCompositeExtract %v3half %val 2
+         %18 = OpCompositeExtract %v3half %val 3
+         %19 = OpCompositeConstruct %mat4v3half %15 %16 %17 %18
+               OpReturnValue %19
+               OpFunctionEnd
+          %f = OpFunction %void None %20
+         %23 = OpLabel
+         %31 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %u %uint_0 %uint_2
+         %32 = OpLoad %mat4x3_f16 %31
+         %27 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %32
+         %24 = OpTranspose %mat3v4half %27
+         %38 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %35 %uint_1
+         %39 = OpLoad %v3half %38
+         %40 = OpVectorShuffle %v3half %39 %39 2 0 1
+         %33 = OpExtInst %half %34 Length %40
+         %42 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %35 %uint_1
+         %43 = OpLoad %v3half %42
+         %44 = OpVectorShuffle %v3half %43 %43 2 0 1
+         %45 = OpCompositeExtract %half %44 0
+         %41 = OpExtInst %half %34 FAbs %45
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..3be1c07
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].zxy);
+  let a = abs(u[0][1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl
new file mode 100644
index 0000000..b66dc37
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+fn a(a : array<mat4x3<f16>, 4>) {}
+fn b(m : mat4x3<f16>) {}
+fn c(v : vec3<f16>) {}
+fn d(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].zxy);
+    d(u[1][0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2768c99
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,65 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(matrix<float16_t, 4, 3> a_1[4]) {
+}
+
+void b(matrix<float16_t, 4, 3> m) {
+}
+
+void c(vector<float16_t, 3> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 32u));
+  uint2 ubo_load_8 = u[2].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[2].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..88a9d54
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,73 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(matrix<float16_t, 4, 3> a_1[4]) {
+}
+
+void b(matrix<float16_t, 4, 3> m) {
+}
+
+void c(vector<float16_t, 3> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 32u));
+  uint2 ubo_load_8 = u[2].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[2].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000296784C0E70(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000296784C0E70(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000296784C0E70(11,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000296784C0E70(14,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..fee3b35
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,52 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} u;
+
+void a(f16mat4x3 a_1[4]) {
+}
+
+void b(f16mat4x3 m) {
+}
+
+void c(f16vec3 v) {
+}
+
+void d(float16_t f_1) {
+}
+
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  a(conv_arr4_mat4x3_f16(u.inner));
+  b(conv_mat4x3_f16(u.inner[1u]));
+  c(u.inner[1u].col0.zxy);
+  d(u.inner[1u].col0.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..ed20c74
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<half4x3, 4> a_1) {
+}
+
+void b(half4x3 m) {
+}
+
+void c(half3 v) {
+}
+
+void d(half f_1) {
+}
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(half3((*(tint_symbol))[1][0]).zxy);
+  d(half3((*(tint_symbol))[1][0]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..8e4fc69
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,169 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 97
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat4v3half = OpTypeMatrix %v3half 4
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+         %10 = OpTypeFunction %void %_arr_mat4v3half_uint_4
+         %17 = OpTypeFunction %void %mat4v3half
+         %21 = OpTypeFunction %void %v3half
+         %25 = OpTypeFunction %void %half
+         %29 = OpTypeFunction %mat4v3half %mat4x3_f16
+         %38 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+         %44 = OpConstantNull %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %47 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %60 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+         %73 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat4v3half_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %m = OpFunctionParameter %mat4v3half
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %v = OpFunctionParameter %v3half
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+        %f_1 = OpFunctionParameter %half
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %29
+        %val = OpFunctionParameter %mat4x3_f16
+         %32 = OpLabel
+         %33 = OpCompositeExtract %v3half %val 0
+         %34 = OpCompositeExtract %v3half %val 1
+         %35 = OpCompositeExtract %v3half %val 2
+         %36 = OpCompositeExtract %v3half %val 3
+         %37 = OpCompositeConstruct %mat4v3half %33 %34 %35 %36
+               OpReturnValue %37
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %38
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %41 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %44
+          %i = OpVariable %_ptr_Function_uint Function %47
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %60
+               OpBranch %48
+         %48 = OpLabel
+               OpLoopMerge %49 %50 None
+               OpBranch %51
+         %51 = OpLabel
+         %53 = OpLoad %uint %i
+         %54 = OpULessThan %bool %53 %uint_4
+         %52 = OpLogicalNot %bool %54
+               OpSelectionMerge %56 None
+               OpBranchConditional %52 %57 %56
+         %57 = OpLabel
+               OpBranch %49
+         %56 = OpLabel
+               OpStore %var_for_index %val_0
+         %61 = OpLoad %uint %i
+         %63 = OpAccessChain %_ptr_Function_mat4v3half %arr %61
+         %65 = OpLoad %uint %i
+         %67 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %65
+         %68 = OpLoad %mat4x3_f16 %67
+         %64 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %68
+               OpStore %63 %64
+               OpBranch %50
+         %50 = OpLabel
+         %69 = OpLoad %uint %i
+         %71 = OpIAdd %uint %69 %uint_1
+               OpStore %i %71
+               OpBranch %48
+         %49 = OpLabel
+         %72 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %72
+               OpFunctionEnd
+          %f = OpFunction %void None %73
+         %75 = OpLabel
+         %80 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %u %uint_0
+         %81 = OpLoad %_arr_mat4x3_f16_uint_4 %80
+         %77 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %81
+         %76 = OpFunctionCall %void %a %77
+         %85 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %u %uint_0 %uint_1
+         %86 = OpLoad %mat4x3_f16 %85
+         %83 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %86
+         %82 = OpFunctionCall %void %b %83
+         %89 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %uint_1 %uint_0
+         %90 = OpLoad %v3half %89
+         %91 = OpVectorShuffle %v3half %90 %90 2 0 1
+         %87 = OpFunctionCall %void %c %91
+         %93 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %uint_1 %uint_0
+         %94 = OpLoad %v3half %93
+         %95 = OpVectorShuffle %v3half %94 %94 2 0 1
+         %96 = OpCompositeExtract %half %95 0
+         %92 = OpFunctionCall %void %d %96
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..280e923
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,23 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+fn a(a : array<mat4x3<f16>, 4>) {
+}
+
+fn b(m : mat4x3<f16>) {
+}
+
+fn c(v : vec3<f16>) {
+}
+
+fn d(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].zxy);
+  d(u[1][0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl
new file mode 100644
index 0000000..20e3e86
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+var<private> p : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].zxy;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..4b50702
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,51 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static matrix<float16_t, 4, 3> p[4] = (matrix<float16_t, 4, 3>[4])0;
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  p[1][0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+  p[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..71ba00c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,56 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static matrix<float16_t, 4, 3> p[4] = (matrix<float16_t, 4, 3>[4])0;
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  p[1][0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+  p[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000222F970C590(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..fc3fbc5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,41 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} u;
+
+f16mat4x3 p[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  p = conv_arr4_mat4x3_f16(u.inner);
+  p[1] = conv_mat4x3_f16(u.inner[2u]);
+  p[1][0] = u.inner[0u].col1.zxy;
+  p[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..89c4a04
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<half4x3, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = half3((*(tint_symbol_1))[0][1]).zxy;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..1223488
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,149 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 88
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+%_ptr_Private__arr_mat4v3half_uint_4 = OpTypePointer Private %_arr_mat4v3half_uint_4
+         %14 = OpConstantNull %_arr_mat4v3half_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat4v3half_uint_4 Private %14
+         %15 = OpTypeFunction %mat4v3half %mat4x3_f16
+         %24 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %32 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %45 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %58 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat4v3half = OpTypePointer Private %mat4v3half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+         %76 = OpConstantNull %int
+%_ptr_Private_v3half = OpTypePointer Private %v3half
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%_ptr_Private_half = OpTypePointer Private %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %15
+        %val = OpFunctionParameter %mat4x3_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v3half %val 0
+         %20 = OpCompositeExtract %v3half %val 1
+         %21 = OpCompositeExtract %v3half %val 2
+         %22 = OpCompositeExtract %v3half %val 3
+         %23 = OpCompositeConstruct %mat4v3half %19 %20 %21 %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %14
+          %i = OpVariable %_ptr_Function_uint Function %32
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %45
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %34 %35 None
+               OpBranch %36
+         %36 = OpLabel
+         %38 = OpLoad %uint %i
+         %39 = OpULessThan %bool %38 %uint_4
+         %37 = OpLogicalNot %bool %39
+               OpSelectionMerge %41 None
+               OpBranchConditional %37 %42 %41
+         %42 = OpLabel
+               OpBranch %34
+         %41 = OpLabel
+               OpStore %var_for_index %val_0
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_mat4v3half %arr %46
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %50
+         %53 = OpLoad %mat4x3_f16 %52
+         %49 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %53
+               OpStore %48 %49
+               OpBranch %35
+         %35 = OpLabel
+         %54 = OpLoad %uint %i
+         %56 = OpIAdd %uint %54 %uint_1
+               OpStore %i %56
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %57
+               OpFunctionEnd
+          %f = OpFunction %void None %58
+         %61 = OpLabel
+         %65 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %u %uint_0
+         %66 = OpLoad %_arr_mat4x3_f16_uint_4 %65
+         %62 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %66
+               OpStore %p %62
+         %70 = OpAccessChain %_ptr_Private_mat4v3half %p %int_1
+         %74 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %u %uint_0 %uint_2
+         %75 = OpLoad %mat4x3_f16 %74
+         %71 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %75
+               OpStore %70 %71
+         %78 = OpAccessChain %_ptr_Private_v3half %p %int_1 %76
+         %80 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %32 %uint_1
+         %81 = OpLoad %v3half %80
+         %82 = OpVectorShuffle %v3half %81 %81 2 0 1
+               OpStore %78 %82
+         %84 = OpAccessChain %_ptr_Private_half %p %int_1 %76 %uint_0
+         %86 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %32 %uint_1 %32
+         %87 = OpLoad %half %86
+               OpStore %84 %87
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..cfa4a46
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+var<private> p : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].zxy;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl
new file mode 100644
index 0000000..1858b2a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].zxy;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a101585
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,67 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value[4]) {
+  matrix<float16_t, 4, 3> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 3> tint_symbol_4(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_4(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  s.Store<vector<float16_t, 3> >(32u, vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  s.Store<float16_t>(32u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..567a5eb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,73 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value[4]) {
+  matrix<float16_t, 4, 3> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 3> tint_symbol_4(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_4(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  s.Store<vector<float16_t, 3> >(32u, vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  s.Store<float16_t>(32u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021F13DB33D0(6,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021F13DB33D0(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..2bf3e2f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,44 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat4x3 inner[4];
+} s;
+
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  s.inner = conv_arr4_mat4x3_f16(u.inner);
+  s.inner[1] = conv_mat4x3_f16(u.inner[2u]);
+  s.inner[1][0] = u.inner[0u].col1.zxy;
+  s.inner[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..7405552
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<half4x3, 4>* tint_symbol [[buffer(1)]], const constant tint_array<half4x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..cb52845
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,160 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 91
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 8
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3half_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %15 = OpTypeFunction %mat4v3half %mat4x3_f16
+         %24 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+         %30 = OpConstantNull %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %33 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %46 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %59 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat4v3half_uint_4 = OpTypePointer StorageBuffer %_arr_mat4v3half_uint_4
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat4v3half = OpTypePointer StorageBuffer %mat4v3half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+         %79 = OpConstantNull %int
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %15
+        %val = OpFunctionParameter %mat4x3_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v3half %val 0
+         %20 = OpCompositeExtract %v3half %val 1
+         %21 = OpCompositeExtract %v3half %val 2
+         %22 = OpCompositeExtract %v3half %val 3
+         %23 = OpCompositeConstruct %mat4v3half %19 %20 %21 %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %30
+          %i = OpVariable %_ptr_Function_uint Function %33
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %46
+               OpBranch %34
+         %34 = OpLabel
+               OpLoopMerge %35 %36 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %uint %i
+         %40 = OpULessThan %bool %39 %uint_4
+         %38 = OpLogicalNot %bool %40
+               OpSelectionMerge %42 None
+               OpBranchConditional %38 %43 %42
+         %43 = OpLabel
+               OpBranch %35
+         %42 = OpLabel
+               OpStore %var_for_index %val_0
+         %47 = OpLoad %uint %i
+         %49 = OpAccessChain %_ptr_Function_mat4v3half %arr %47
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %51
+         %54 = OpLoad %mat4x3_f16 %53
+         %50 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %54
+               OpStore %49 %50
+               OpBranch %36
+         %36 = OpLabel
+         %55 = OpLoad %uint %i
+         %57 = OpIAdd %uint %55 %uint_1
+               OpStore %i %57
+               OpBranch %34
+         %35 = OpLabel
+         %58 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %58
+               OpFunctionEnd
+          %f = OpFunction %void None %59
+         %62 = OpLabel
+         %65 = OpAccessChain %_ptr_StorageBuffer__arr_mat4v3half_uint_4 %s %uint_0
+         %68 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %u %uint_0
+         %69 = OpLoad %_arr_mat4x3_f16_uint_4 %68
+         %66 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %69
+               OpStore %65 %66
+         %73 = OpAccessChain %_ptr_StorageBuffer_mat4v3half %s %uint_0 %int_1
+         %77 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %u %uint_0 %uint_2
+         %78 = OpLoad %mat4x3_f16 %77
+         %74 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %78
+               OpStore %73 %74
+         %81 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1 %79
+         %83 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %33 %uint_1
+         %84 = OpLoad %v3half %83
+         %85 = OpVectorShuffle %v3half %84 %84 2 0 1
+               OpStore %81 %85
+         %87 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %int_1 %79 %uint_0
+         %89 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %33 %uint_1 %33
+         %90 = OpLoad %half %89
+               OpStore %87 %90
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..3ff7968
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].zxy;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..faaeba5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+var<workgroup> w : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].zxy;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..6024dea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,66 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared matrix<float16_t, 4, 3> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 3> tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 4, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  w[1][0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+  w[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..3f7cf0d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,71 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared matrix<float16_t, 4, 3> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 3> tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 4, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  w[1][0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+  w[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001D6F2EDF360(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..ebe86ff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,48 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} u;
+
+shared f16mat4x3 w[4];
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = f16mat4x3(f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf));
+    }
+  }
+  barrier();
+  w = conv_arr4_mat4x3_f16(u.inner);
+  w[1] = conv_mat4x3_f16(u.inner[2u]);
+  w[1][0] = u.inner[0u].col1.zxy;
+  w[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..5db2fda
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<half4x3, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<half4x3, 4>* const tint_symbol, const constant tint_array<half4x3, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = half4x3(half3(0.0h), half3(0.0h), half3(0.0h), half3(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<half4x3, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..335346f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,192 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 113
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+%_ptr_Workgroup__arr_mat4v3half_uint_4 = OpTypePointer Workgroup %_arr_mat4v3half_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat4v3half_uint_4 Workgroup
+         %16 = OpTypeFunction %mat4v3half %mat4x3_f16
+         %25 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+         %31 = OpConstantNull %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %34 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %47 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %60 = OpTypeFunction %void %uint
+%_ptr_Workgroup_mat4v3half = OpTypePointer Workgroup %mat4v3half
+         %78 = OpConstantNull %mat4v3half
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+         %96 = OpConstantNull %int
+%_ptr_Workgroup_v3half = OpTypePointer Workgroup %v3half
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %108 = OpTypeFunction %void
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %16
+        %val = OpFunctionParameter %mat4x3_f16
+         %19 = OpLabel
+         %20 = OpCompositeExtract %v3half %val 0
+         %21 = OpCompositeExtract %v3half %val 1
+         %22 = OpCompositeExtract %v3half %val 2
+         %23 = OpCompositeExtract %v3half %val 3
+         %24 = OpCompositeConstruct %mat4v3half %20 %21 %22 %23
+               OpReturnValue %24
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %25
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %28 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %31
+          %i = OpVariable %_ptr_Function_uint Function %34
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %47
+               OpBranch %35
+         %35 = OpLabel
+               OpLoopMerge %36 %37 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %uint %i
+         %41 = OpULessThan %bool %40 %uint_4
+         %39 = OpLogicalNot %bool %41
+               OpSelectionMerge %43 None
+               OpBranchConditional %39 %44 %43
+         %44 = OpLabel
+               OpBranch %36
+         %43 = OpLabel
+               OpStore %var_for_index %val_0
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_mat4v3half %arr %48
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %52
+         %55 = OpLoad %mat4x3_f16 %54
+         %51 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %55
+               OpStore %50 %51
+               OpBranch %37
+         %37 = OpLabel
+         %56 = OpLoad %uint %i
+         %58 = OpIAdd %uint %56 %uint_1
+               OpStore %i %58
+               OpBranch %35
+         %36 = OpLabel
+         %59 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %59
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %60
+%local_invocation_index = OpFunctionParameter %uint
+         %64 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %34
+               OpStore %idx %local_invocation_index
+               OpBranch %66
+         %66 = OpLabel
+               OpLoopMerge %67 %68 None
+               OpBranch %69
+         %69 = OpLabel
+         %71 = OpLoad %uint %idx
+         %72 = OpULessThan %bool %71 %uint_4
+         %70 = OpLogicalNot %bool %72
+               OpSelectionMerge %73 None
+               OpBranchConditional %70 %74 %73
+         %74 = OpLabel
+               OpBranch %67
+         %73 = OpLabel
+         %75 = OpLoad %uint %idx
+         %77 = OpAccessChain %_ptr_Workgroup_mat4v3half %w %75
+               OpStore %77 %78
+               OpBranch %68
+         %68 = OpLabel
+         %79 = OpLoad %uint %idx
+         %80 = OpIAdd %uint %79 %uint_1
+               OpStore %idx %80
+               OpBranch %66
+         %67 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %87 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %u %uint_0
+         %88 = OpLoad %_arr_mat4x3_f16_uint_4 %87
+         %84 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %88
+               OpStore %w %84
+         %91 = OpAccessChain %_ptr_Workgroup_mat4v3half %w %int_1
+         %94 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %u %uint_0 %uint_2
+         %95 = OpLoad %mat4x3_f16 %94
+         %92 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %95
+               OpStore %91 %92
+         %98 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1 %96
+        %100 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %34 %uint_1
+        %101 = OpLoad %v3half %100
+        %102 = OpVectorShuffle %v3half %101 %101 2 0 1
+               OpStore %98 %102
+        %104 = OpAccessChain %_ptr_Workgroup_half %w %int_1 %96 %uint_0
+        %106 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %34 %uint_1 %34
+        %107 = OpLoad %half %106
+               OpStore %104 %107
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %108
+        %110 = OpLabel
+        %112 = OpLoad %uint %local_invocation_index_1
+        %111 = OpFunctionCall %void %f_inner %112
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..39cd472
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+var<workgroup> w : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].zxy;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..f1ae5b2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f32>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat4x3<f32>, 4> = *p_a;
+  let l_a_i     : mat4x3<f32>           = *p_a_i;
+  let l_a_i_i   : vec3<f32>             = *p_a_i_i;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0d9daa8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,39 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float4x3 l_a[4] = tint_symbol(a, 0u);
+  const float4x3 l_a_i = tint_symbol_1(a, (64u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float3 l_a_i_i = asfloat(a[scalar_offset_4 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0d9daa8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,39 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float4x3 l_a[4] = tint_symbol(a, 0u);
+  const float4x3 l_a_i = tint_symbol_1(a, (64u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float3 l_a_i_i = asfloat(a[scalar_offset_4 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..5a78b8c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,27 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat4x3 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_i_save = tint_symbol_1;
+  mat4x3 l_a[4] = a.inner;
+  mat4x3 l_a_i = a.inner[p_a_i_save];
+  vec3 l_a_i_i = a.inner[p_a_i_save][p_a_i_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..0c60210
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<float4x3, 4> const l_a = *(tint_symbol_3);
+  float4x3 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  float3 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..e1fda42
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,64 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %a_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %21 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+          %f = OpFunction %void None %21
+         %24 = OpLabel
+         %25 = OpFunctionCall %int %i
+         %26 = OpFunctionCall %int %i
+         %29 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %a %uint_0
+         %30 = OpLoad %_arr_mat4v3float_uint_4 %29
+         %32 = OpAccessChain %_ptr_Uniform_mat4v3float %a %uint_0 %25
+         %33 = OpLoad %mat4v3float %32
+         %35 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %25 %26
+         %36 = OpLoad %v3float %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..aa2b7eb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f32>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat4x3<f32>, 4> = *(p_a);
+  let l_a_i : mat4x3<f32> = *(p_a_i);
+  let l_a_i_i : vec3<f32> = *(p_a_i_i);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..47ec86d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,12 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat4x3<f32>, 4> = *p_a;
+  let l_a_i     : mat4x3<f32>           = *p_a_2;
+  let l_a_i_i   : vec3<f32>             = *p_a_2_1;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7b865c6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 l_a[4] = tint_symbol(a, 0u);
+  const float4x3 l_a_i = tint_symbol_1(a, 128u);
+  const float3 l_a_i_i = asfloat(a[9].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7b865c6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 l_a[4] = tint_symbol(a, 0u);
+  const float4x3 l_a_i = tint_symbol_1(a, 128u);
+  const float3 l_a_i_i = asfloat(a[9].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..f4c0c5c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat4x3 inner[4];
+} a;
+
+void f() {
+  mat4x3 l_a[4] = a.inner;
+  mat4x3 l_a_i = a.inner[2];
+  vec3 l_a_i_i = a.inner[2][1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..0b5148e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<float4x3, 4> const l_a = *(tint_symbol);
+  float4x3 const l_a_i = (*(tint_symbol))[2];
+  float3 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..d03dc81
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,49 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %a_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %a %uint_0
+         %17 = OpLoad %_arr_mat4v3float_uint_4 %16
+         %21 = OpAccessChain %_ptr_Uniform_mat4v3float %a %uint_0 %int_2
+         %22 = OpLoad %mat4v3float %21
+         %25 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %int_2 %int_1
+         %26 = OpLoad %v3float %25
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..bfe8256
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat4x3<f32>, 4> = *(p_a);
+  let l_a_i : mat4x3<f32> = *(p_a_2);
+  let l_a_i_i : vec3<f32> = *(p_a_2_1);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl
new file mode 100644
index 0000000..d354ee8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].zxy);
+    let a = abs(u[0][1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f398365
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+float4x3 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 t = transpose(tint_symbol(u, 128u));
+  const float l = length(asfloat(u[1].xyz).zxy);
+  const float a = abs(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f398365
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+float4x3 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 t = transpose(tint_symbol(u, 128u));
+  const float l = length(asfloat(u[1].xyz).zxy);
+  const float a = abs(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..fb08583
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner[4];
+} u;
+
+void f() {
+  mat3x4 t = transpose(u.inner[2]);
+  float l = length(u.inner[0][1].zxy);
+  float a = abs(u.inner[0][1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..3edd8e3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol [[buffer(0)]]) {
+  float3x4 const t = transpose((*(tint_symbol))[2]);
+  float const l = length(float3((*(tint_symbol))[0][1]).zxy);
+  float const a = fabs(float3((*(tint_symbol))[0][1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..018984f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,58 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+         %24 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %25 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2
+         %22 = OpLoad %mat4v3float %21
+         %14 = OpTranspose %mat3v4float %22
+         %28 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %25 %int_1
+         %29 = OpLoad %v3float %28
+         %30 = OpVectorShuffle %v3float %29 %29 2 0 1
+         %23 = OpExtInst %float %24 Length %30
+         %32 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %25 %int_1
+         %33 = OpLoad %v3float %32
+         %34 = OpVectorShuffle %v3float %33 %33 2 0 1
+         %35 = OpCompositeExtract %float %34 0
+         %31 = OpExtInst %float %24 FAbs %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..87104e4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].zxy);
+  let a = abs(u[0][1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl
new file mode 100644
index 0000000..59de93c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+fn a(a : array<mat4x3<f32>, 4>) {}
+fn b(m : mat4x3<f32>) {}
+fn c(v : vec3<f32>) {}
+fn d(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].zxy);
+    d(u[1][0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c1ac4a6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,43 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+void a(float4x3 a_1[4]) {
+}
+
+void b(float4x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 64u));
+  c(asfloat(u[4].xyz).zxy);
+  d(asfloat(u[4].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c1ac4a6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,43 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+void a(float4x3 a_1[4]) {
+}
+
+void b(float4x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 64u));
+  c(asfloat(u[4].xyz).zxy);
+  d(asfloat(u[4].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..6458ea6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner[4];
+} u;
+
+void a(mat4x3 a_1[4]) {
+}
+
+void b(mat4x3 m) {
+}
+
+void c(vec3 v) {
+}
+
+void d(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  c(u.inner[1][0].zxy);
+  d(u.inner[1][0].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..f2f446e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<float4x3, 4> a_1) {
+}
+
+void b(float4x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(float3((*(tint_symbol))[1][0]).zxy);
+  d(float3((*(tint_symbol))[1][0]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..15aa728
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 52
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void %_arr_mat4v3float_uint_4
+         %15 = OpTypeFunction %void %mat4v3float
+         %19 = OpTypeFunction %void %v3float
+         %23 = OpTypeFunction %void %float
+         %27 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %42 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat4v3float_uint_4
+         %14 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %15
+          %m = OpFunctionParameter %mat4v3float
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %19
+          %v = OpFunctionParameter %v3float
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %23
+        %f_1 = OpFunctionParameter %float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %27
+         %29 = OpLabel
+         %33 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %u %uint_0
+         %34 = OpLoad %_arr_mat4v3float_uint_4 %33
+         %30 = OpFunctionCall %void %a %34
+         %39 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_1
+         %40 = OpLoad %mat4v3float %39
+         %35 = OpFunctionCall %void %b %40
+         %44 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1 %42
+         %45 = OpLoad %v3float %44
+         %46 = OpVectorShuffle %v3float %45 %45 2 0 1
+         %41 = OpFunctionCall %void %c %46
+         %48 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1 %42
+         %49 = OpLoad %v3float %48
+         %50 = OpVectorShuffle %v3float %49 %49 2 0 1
+         %51 = OpCompositeExtract %float %50 0
+         %47 = OpFunctionCall %void %d %51
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..db050c9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+fn a(a : array<mat4x3<f32>, 4>) {
+}
+
+fn b(m : mat4x3<f32>) {
+}
+
+fn c(v : vec3<f32>) {
+}
+
+fn d(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].zxy);
+  d(u[1][0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl
new file mode 100644
index 0000000..eab66b0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+var<private> p : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].zxy;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5236d8f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,32 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+static float4x3 p[4] = (float4x3[4])0;
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 128u);
+  p[1][0] = asfloat(u[1].xyz).zxy;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5236d8f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,32 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+static float4x3 p[4] = (float4x3[4])0;
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 128u);
+  p[1][0] = asfloat(u[1].xyz).zxy;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..6c91e16
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner[4];
+} u;
+
+mat4x3 p[4] = mat4x3[4](mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[1][0] = u.inner[0][1].zxy;
+  p[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..cfb8e04
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<float4x3, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..aed5708
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 41
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_mat4v3float_uint_4 = OpTypePointer Private %_arr_mat4v3float_uint_4
+         %12 = OpConstantNull %_arr_mat4v3float_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat4v3float_uint_4 Private %12
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat4v3float = OpTypePointer Private %mat4v3float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %29 = OpConstantNull %int
+%_ptr_Private_v3float = OpTypePointer Private %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %u %uint_0
+         %20 = OpLoad %_arr_mat4v3float_uint_4 %19
+               OpStore %p %20
+         %24 = OpAccessChain %_ptr_Private_mat4v3float %p %int_1
+         %27 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2
+         %28 = OpLoad %mat4v3float %27
+               OpStore %24 %28
+         %31 = OpAccessChain %_ptr_Private_v3float %p %int_1 %29
+         %33 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %29 %int_1
+         %34 = OpLoad %v3float %33
+         %35 = OpVectorShuffle %v3float %34 %34 2 0 1
+               OpStore %31 %35
+         %37 = OpAccessChain %_ptr_Private_float %p %int_1 %29 %uint_0
+         %39 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %29 %int_1 %uint_0
+         %40 = OpLoad %float %39
+               OpStore %37 %40
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..2c89745
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+var<private> p : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].zxy;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl
new file mode 100644
index 0000000..2a1f17f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].zxy;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..befc57d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,48 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float4x3 value[4]) {
+  float4x3 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 64u)), array[i]);
+    }
+  }
+}
+
+float4x3 tint_symbol_4(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 64u, tint_symbol_4(u, 128u));
+  s.Store3(64u, asuint(asfloat(u[1].xyz).zxy));
+  s.Store(64u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..befc57d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,48 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float4x3 value[4]) {
+  float4x3 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 64u)), array[i]);
+    }
+  }
+}
+
+float4x3 tint_symbol_4(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 64u, tint_symbol_4(u, 128u));
+  s.Store3(64u, asuint(asfloat(u[1].xyz).zxy));
+  s.Store(64u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..eb0649d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat4x3 inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[1][0] = u.inner[0][1].zxy;
+  s.inner[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..05cfa2f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<float4x3, 4>* tint_symbol [[buffer(1)]], const constant tint_array<float4x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..061e9a9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,71 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat4v3float_uint_4 = OpTypePointer StorageBuffer %_arr_mat4v3float_uint_4
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %30 = OpConstantNull %int
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %18 = OpAccessChain %_ptr_StorageBuffer__arr_mat4v3float_uint_4 %s %uint_0
+         %20 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %u %uint_0
+         %21 = OpLoad %_arr_mat4v3float_uint_4 %20
+               OpStore %18 %21
+         %25 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %s %uint_0 %int_1
+         %28 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2
+         %29 = OpLoad %mat4v3float %28
+               OpStore %25 %29
+         %32 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1 %30
+         %34 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %30 %int_1
+         %35 = OpLoad %v3float %34
+         %36 = OpVectorShuffle %v3float %35 %35 2 0 1
+               OpStore %32 %36
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %int_1 %30 %uint_0
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %30 %int_1 %uint_0
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..a3b16ba
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].zxy;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..6f078c4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+var<workgroup> w : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].zxy;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..1c1745a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,47 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+groupshared float4x3 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x3 tint_symbol_3(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 128u);
+  w[1][0] = asfloat(u[1].xyz).zxy;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..1c1745a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,47 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+groupshared float4x3 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x3 tint_symbol_3(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 128u);
+  w[1][0] = asfloat(u[1].xyz).zxy;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..f06a3a3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner[4];
+} u;
+
+shared mat4x3 w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = mat4x3(vec3(0.0f), vec3(0.0f), vec3(0.0f), vec3(0.0f));
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[1][0] = u.inner[0][1].zxy;
+  w[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..b6e7f8c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<float4x3, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<float4x3, 4>* const tint_symbol, const constant tint_array<float4x3, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<float4x3, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..b2a40f3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,115 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 70
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_mat4v3float_uint_4 = OpTypePointer Workgroup %_arr_mat4v3float_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat4v3float_uint_4 Workgroup
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %21 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_mat4v3float = OpTypePointer Workgroup %mat4v3float
+         %35 = OpConstantNull %mat4v3float
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %53 = OpConstantNull %int
+%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %65 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %14
+%local_invocation_index = OpFunctionParameter %uint
+         %18 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %21
+               OpStore %idx %local_invocation_index
+               OpBranch %22
+         %22 = OpLabel
+               OpLoopMerge %23 %24 None
+               OpBranch %25
+         %25 = OpLabel
+         %27 = OpLoad %uint %idx
+         %28 = OpULessThan %bool %27 %uint_4
+         %26 = OpLogicalNot %bool %28
+               OpSelectionMerge %30 None
+               OpBranchConditional %26 %31 %30
+         %31 = OpLabel
+               OpBranch %23
+         %30 = OpLabel
+         %32 = OpLoad %uint %idx
+         %34 = OpAccessChain %_ptr_Workgroup_mat4v3float %w %32
+               OpStore %34 %35
+               OpBranch %24
+         %24 = OpLabel
+         %36 = OpLoad %uint %idx
+         %38 = OpIAdd %uint %36 %uint_1
+               OpStore %idx %38
+               OpBranch %22
+         %23 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %44 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %u %uint_0
+         %45 = OpLoad %_arr_mat4v3float_uint_4 %44
+               OpStore %w %45
+         %48 = OpAccessChain %_ptr_Workgroup_mat4v3float %w %int_1
+         %51 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2
+         %52 = OpLoad %mat4v3float %51
+               OpStore %48 %52
+         %55 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1 %53
+         %57 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %53 %int_1
+         %58 = OpLoad %v3float %57
+         %59 = OpVectorShuffle %v3float %58 %58 2 0 1
+               OpStore %55 %59
+         %61 = OpAccessChain %_ptr_Workgroup_float %w %int_1 %53 %uint_0
+         %63 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %53 %int_1 %uint_0
+         %64 = OpLoad %float %63
+               OpStore %61 %64
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %65
+         %67 = OpLabel
+         %69 = OpLoad %uint %local_invocation_index_1
+         %68 = OpFunctionCall %void %f_inner %69
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..3a25aa3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+var<workgroup> w : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].zxy;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..44fece1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x4<f16>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat4x4<f16>, 4> = *p_a;
+  let l_a_i     : mat4x4<f16>           = *p_a_i;
+  let l_a_i_i   : vec4<f16>             = *p_a_i_i;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..59fae52
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,59 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 4> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 4, 4> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 4> l_a_i = tint_symbol_1(a, (32u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((32u * uint(p_a_i_save)) + (8u * uint(p_a_i_i_save)))) / 4;
+  uint4 ubo_load_9 = a[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_a_i_i = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c58ef9c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,64 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 4> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 4, 4> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 4> l_a_i = tint_symbol_1(a, (32u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((32u * uint(p_a_i_save)) + (8u * uint(p_a_i_i_save)))) / 4;
+  uint4 ubo_load_9 = a[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_a_i_i = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F4B0E9BB50(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..1ba2908
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,75 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+  f16vec4 col2;
+  f16vec4 col3;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat4x4_f16 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat4 conv_mat4x4_f16(mat4x4_f16 val) {
+  return f16mat4(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4[4] conv_arr4_mat4x4_f16(mat4x4_f16 val[4]) {
+  f16mat4 arr[4] = f16mat4[4](f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16vec4 load_a_inner_p0_p1(uint p0, uint p1) {
+  switch(p1) {
+    case 0u: {
+      return a.inner[p0].col0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].col1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].col2;
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].col3;
+      break;
+    }
+    default: {
+      return f16vec4(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat4 p_a[4] = conv_arr4_mat4x4_f16(a.inner);
+  int tint_symbol = i();
+  f16mat4 p_a_i = conv_mat4x4_f16(a.inner[tint_symbol]);
+  int tint_symbol_1 = i();
+  f16vec4 p_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+  f16mat4 l_a[4] = conv_arr4_mat4x4_f16(a.inner);
+  f16mat4 l_a_i = conv_mat4x4_f16(a.inner[tint_symbol]);
+  f16vec4 l_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..c1b59e6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<half4x4, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<half4x4, 4> const l_a = *(tint_symbol_3);
+  half4x4 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  half4 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..575ae9b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,181 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 108
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat4x4_f16 "mat4x4_f16"
+               OpMemberName %mat4x4_f16 0 "col0"
+               OpMemberName %mat4x4_f16 1 "col1"
+               OpMemberName %mat4x4_f16 2 "col2"
+               OpMemberName %mat4x4_f16 3 "col3"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %conv_mat4x4_f16 "conv_mat4x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x4_f16 "conv_arr4_mat4x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_p1 "load_a_inner_p0_p1"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 1 Offset 8
+               OpMemberDecorate %mat4x4_f16 2 Offset 16
+               OpMemberDecorate %mat4x4_f16 3 Offset 24
+               OpDecorate %_arr_mat4x4_f16_uint_4 ArrayStride 32
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat4v4half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat4x4_f16 = OpTypeStruct %v4half %v4half %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x4_f16_uint_4 = OpTypeArray %mat4x4_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat4x4_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v4half = OpTypeMatrix %v4half 4
+         %21 = OpTypeFunction %mat4v4half %mat4x4_f16
+%_arr_mat4v4half_uint_4 = OpTypeArray %mat4v4half %uint_4
+         %31 = OpTypeFunction %_arr_mat4v4half_uint_4 %_arr_mat4x4_f16_uint_4
+%_ptr_Function__arr_mat4v4half_uint_4 = OpTypePointer Function %_arr_mat4v4half_uint_4
+         %38 = OpConstantNull %_arr_mat4v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %41 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x4_f16_uint_4 = OpTypePointer Function %_arr_mat4x4_f16_uint_4
+         %54 = OpConstantNull %_arr_mat4x4_f16_uint_4
+%_ptr_Function_mat4v4half = OpTypePointer Function %mat4v4half
+%_ptr_Function_mat4x4_f16 = OpTypePointer Function %mat4x4_f16
+     %uint_1 = OpConstant %uint 1
+         %67 = OpTypeFunction %v4half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+         %90 = OpConstantNull %v4half
+       %void = OpTypeVoid
+         %91 = OpTypeFunction %void
+%_ptr_Uniform__arr_mat4x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x4_f16_uint_4
+%_ptr_Uniform_mat4x4_f16 = OpTypePointer Uniform %mat4x4_f16
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+%conv_mat4x4_f16 = OpFunction %mat4v4half None %21
+        %val = OpFunctionParameter %mat4x4_f16
+         %25 = OpLabel
+         %26 = OpCompositeExtract %v4half %val 0
+         %27 = OpCompositeExtract %v4half %val 1
+         %28 = OpCompositeExtract %v4half %val 2
+         %29 = OpCompositeExtract %v4half %val 3
+         %30 = OpCompositeConstruct %mat4v4half %26 %27 %28 %29
+               OpReturnValue %30
+               OpFunctionEnd
+%conv_arr4_mat4x4_f16 = OpFunction %_arr_mat4v4half_uint_4 None %31
+      %val_0 = OpFunctionParameter %_arr_mat4x4_f16_uint_4
+         %35 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v4half_uint_4 Function %38
+        %i_0 = OpVariable %_ptr_Function_uint Function %41
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x4_f16_uint_4 Function %54
+               OpBranch %42
+         %42 = OpLabel
+               OpLoopMerge %43 %44 None
+               OpBranch %45
+         %45 = OpLabel
+         %47 = OpLoad %uint %i_0
+         %48 = OpULessThan %bool %47 %uint_4
+         %46 = OpLogicalNot %bool %48
+               OpSelectionMerge %50 None
+               OpBranchConditional %46 %51 %50
+         %51 = OpLabel
+               OpBranch %43
+         %50 = OpLabel
+               OpStore %var_for_index %val_0
+         %55 = OpLoad %uint %i_0
+         %57 = OpAccessChain %_ptr_Function_mat4v4half %arr %55
+         %59 = OpLoad %uint %i_0
+         %61 = OpAccessChain %_ptr_Function_mat4x4_f16 %var_for_index %59
+         %62 = OpLoad %mat4x4_f16 %61
+         %58 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %62
+               OpStore %57 %58
+               OpBranch %44
+         %44 = OpLabel
+         %63 = OpLoad %uint %i_0
+         %65 = OpIAdd %uint %63 %uint_1
+               OpStore %i_0 %65
+               OpBranch %42
+         %43 = OpLabel
+         %66 = OpLoad %_arr_mat4v4half_uint_4 %arr
+               OpReturnValue %66
+               OpFunctionEnd
+%load_a_inner_p0_p1 = OpFunction %v4half None %67
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+         %71 = OpLabel
+               OpSelectionMerge %72 None
+               OpSwitch %p1 %73 0 %74 1 %75 2 %76 3 %77
+         %74 = OpLabel
+         %80 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0 %uint_0
+         %81 = OpLoad %v4half %80
+               OpReturnValue %81
+         %75 = OpLabel
+         %82 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0 %uint_1
+         %83 = OpLoad %v4half %82
+               OpReturnValue %83
+         %76 = OpLabel
+         %85 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0 %uint_2
+         %86 = OpLoad %v4half %85
+               OpReturnValue %86
+         %77 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0 %uint_3
+         %89 = OpLoad %v4half %88
+               OpReturnValue %89
+         %73 = OpLabel
+               OpReturnValue %90
+         %72 = OpLabel
+               OpReturnValue %90
+               OpFunctionEnd
+          %f = OpFunction %void None %91
+         %94 = OpLabel
+         %95 = OpFunctionCall %int %i
+         %96 = OpFunctionCall %int %i
+         %99 = OpAccessChain %_ptr_Uniform__arr_mat4x4_f16_uint_4 %a %uint_0
+        %100 = OpLoad %_arr_mat4x4_f16_uint_4 %99
+         %97 = OpFunctionCall %_arr_mat4v4half_uint_4 %conv_arr4_mat4x4_f16 %100
+        %103 = OpAccessChain %_ptr_Uniform_mat4x4_f16 %a %uint_0 %95
+        %104 = OpLoad %mat4x4_f16 %103
+        %101 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %104
+        %106 = OpBitcast %uint %95
+        %107 = OpBitcast %uint %96
+        %105 = OpFunctionCall %v4half %load_a_inner_p0_p1 %106 %107
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..0d1e24a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x4<f16>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat4x4<f16>, 4> = *(p_a);
+  let l_a_i : mat4x4<f16> = *(p_a_i);
+  let l_a_i_i : vec4<f16> = *(p_a_i_i);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..580856f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,14 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat4x4<f16>, 4> = *p_a;
+  let l_a_i     : mat4x4<f16>           = *p_a_2;
+  let l_a_i_i   : vec4<f16>             = *p_a_2_1;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0413807
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,49 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+
+matrix<float16_t, 4, 4> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 4> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 4> l_a_i = tint_symbol_1(a, 64u);
+  uint2 ubo_load_8 = a[4].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_a_i_i = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d0b7cb8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,54 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+
+matrix<float16_t, 4, 4> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 4> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 4> l_a_i = tint_symbol_1(a, 64u);
+  uint2 ubo_load_8 = a[4].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_a_i_i = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001586950D8A0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..2e2ba11
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,42 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+  f16vec4 col2;
+  f16vec4 col3;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat4x4_f16 inner[4];
+} a;
+
+f16mat4 conv_mat4x4_f16(mat4x4_f16 val) {
+  return f16mat4(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4[4] conv_arr4_mat4x4_f16(mat4x4_f16 val[4]) {
+  f16mat4 arr[4] = f16mat4[4](f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  f16mat4 p_a[4] = conv_arr4_mat4x4_f16(a.inner);
+  f16mat4 p_a_2 = conv_mat4x4_f16(a.inner[2u]);
+  f16vec4 p_a_2_1 = a.inner[2u].col1;
+  f16mat4 l_a[4] = conv_arr4_mat4x4_f16(a.inner);
+  f16mat4 l_a_i = conv_mat4x4_f16(a.inner[2u]);
+  f16vec4 l_a_i_i = a.inner[2u].col1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..ccd4ec1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x4, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<half4x4, 4> const l_a = *(tint_symbol);
+  half4x4 const l_a_i = (*(tint_symbol))[2];
+  half4 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..a982f07
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,129 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 73
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat4x4_f16 "mat4x4_f16"
+               OpMemberName %mat4x4_f16 0 "col0"
+               OpMemberName %mat4x4_f16 1 "col1"
+               OpMemberName %mat4x4_f16 2 "col2"
+               OpMemberName %mat4x4_f16 3 "col3"
+               OpName %a "a"
+               OpName %conv_mat4x4_f16 "conv_mat4x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x4_f16 "conv_arr4_mat4x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 1 Offset 8
+               OpMemberDecorate %mat4x4_f16 2 Offset 16
+               OpMemberDecorate %mat4x4_f16 3 Offset 24
+               OpDecorate %_arr_mat4x4_f16_uint_4 ArrayStride 32
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat4v4half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat4x4_f16 = OpTypeStruct %v4half %v4half %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x4_f16_uint_4 = OpTypeArray %mat4x4_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat4x4_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+         %10 = OpTypeFunction %mat4v4half %mat4x4_f16
+%_arr_mat4v4half_uint_4 = OpTypeArray %mat4v4half %uint_4
+         %20 = OpTypeFunction %_arr_mat4v4half_uint_4 %_arr_mat4x4_f16_uint_4
+%_ptr_Function__arr_mat4v4half_uint_4 = OpTypePointer Function %_arr_mat4v4half_uint_4
+         %27 = OpConstantNull %_arr_mat4v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %30 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x4_f16_uint_4 = OpTypePointer Function %_arr_mat4x4_f16_uint_4
+         %43 = OpConstantNull %_arr_mat4x4_f16_uint_4
+%_ptr_Function_mat4v4half = OpTypePointer Function %mat4v4half
+%_ptr_Function_mat4x4_f16 = OpTypePointer Function %mat4x4_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %56 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x4_f16_uint_4
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x4_f16 = OpTypePointer Uniform %mat4x4_f16
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+%conv_mat4x4_f16 = OpFunction %mat4v4half None %10
+        %val = OpFunctionParameter %mat4x4_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v4half %val 0
+         %16 = OpCompositeExtract %v4half %val 1
+         %17 = OpCompositeExtract %v4half %val 2
+         %18 = OpCompositeExtract %v4half %val 3
+         %19 = OpCompositeConstruct %mat4v4half %15 %16 %17 %18
+               OpReturnValue %19
+               OpFunctionEnd
+%conv_arr4_mat4x4_f16 = OpFunction %_arr_mat4v4half_uint_4 None %20
+      %val_0 = OpFunctionParameter %_arr_mat4x4_f16_uint_4
+         %24 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v4half_uint_4 Function %27
+          %i = OpVariable %_ptr_Function_uint Function %30
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x4_f16_uint_4 Function %43
+               OpBranch %31
+         %31 = OpLabel
+               OpLoopMerge %32 %33 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %uint %i
+         %37 = OpULessThan %bool %36 %uint_4
+         %35 = OpLogicalNot %bool %37
+               OpSelectionMerge %39 None
+               OpBranchConditional %35 %40 %39
+         %40 = OpLabel
+               OpBranch %32
+         %39 = OpLabel
+               OpStore %var_for_index %val_0
+         %44 = OpLoad %uint %i
+         %46 = OpAccessChain %_ptr_Function_mat4v4half %arr %44
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_mat4x4_f16 %var_for_index %48
+         %51 = OpLoad %mat4x4_f16 %50
+         %47 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %51
+               OpStore %46 %47
+               OpBranch %33
+         %33 = OpLabel
+         %52 = OpLoad %uint %i
+         %54 = OpIAdd %uint %52 %uint_1
+               OpStore %i %54
+               OpBranch %31
+         %32 = OpLabel
+         %55 = OpLoad %_arr_mat4v4half_uint_4 %arr
+               OpReturnValue %55
+               OpFunctionEnd
+          %f = OpFunction %void None %56
+         %59 = OpLabel
+         %63 = OpAccessChain %_ptr_Uniform__arr_mat4x4_f16_uint_4 %a %uint_0
+         %64 = OpLoad %_arr_mat4x4_f16_uint_4 %63
+         %60 = OpFunctionCall %_arr_mat4v4half_uint_4 %conv_arr4_mat4x4_f16 %64
+         %68 = OpAccessChain %_ptr_Uniform_mat4x4_f16 %a %uint_0 %uint_2
+         %69 = OpLoad %mat4x4_f16 %68
+         %65 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %69
+         %71 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %uint_2 %uint_1
+         %72 = OpLoad %v4half %71
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..c91863f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat4x4<f16>, 4> = *(p_a);
+  let l_a_i : mat4x4<f16> = *(p_a_2);
+  let l_a_i_i : vec4<f16> = *(p_a_2_1);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl
new file mode 100644
index 0000000..c2729ec
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].ywxz);
+    let a = abs(u[0][1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3db7f37
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 4> t = transpose(tint_symbol(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  uint2 ubo_load_9 = u[0].zw;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..dbd9a19
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 4> t = transpose(tint_symbol(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  uint2 ubo_load_9 = u[0].zw;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002574E068B20(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..24c5493
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+  f16vec4 col2;
+  f16vec4 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x4_f16 inner[4];
+} u;
+
+f16mat4 conv_mat4x4_f16(mat4x4_f16 val) {
+  return f16mat4(val.col0, val.col1, val.col2, val.col3);
+}
+
+void f() {
+  f16mat4 t = transpose(conv_mat4x4_f16(u.inner[2u]));
+  float16_t l = length(u.inner[0u].col1.ywxz);
+  float16_t a = abs(u.inner[0u].col1.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..f1b041d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x4, 4>* tint_symbol [[buffer(0)]]) {
+  half4x4 const t = transpose((*(tint_symbol))[2]);
+  half const l = length(half4((*(tint_symbol))[0][1]).ywxz);
+  half const a = fabs(half4((*(tint_symbol))[0][1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..4d30027
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %32 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x4_f16 "mat4x4_f16"
+               OpMemberName %mat4x4_f16 0 "col0"
+               OpMemberName %mat4x4_f16 1 "col1"
+               OpMemberName %mat4x4_f16 2 "col2"
+               OpMemberName %mat4x4_f16 3 "col3"
+               OpName %u "u"
+               OpName %conv_mat4x4_f16 "conv_mat4x4_f16"
+               OpName %val "val"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 1 Offset 8
+               OpMemberDecorate %mat4x4_f16 2 Offset 16
+               OpMemberDecorate %mat4x4_f16 3 Offset 24
+               OpDecorate %_arr_mat4x4_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat4x4_f16 = OpTypeStruct %v4half %v4half %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x4_f16_uint_4 = OpTypeArray %mat4x4_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x4_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+         %10 = OpTypeFunction %mat4v4half %mat4x4_f16
+       %void = OpTypeVoid
+         %20 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x4_f16 = OpTypePointer Uniform %mat4x4_f16
+         %33 = OpConstantNull %uint
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+%conv_mat4x4_f16 = OpFunction %mat4v4half None %10
+        %val = OpFunctionParameter %mat4x4_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v4half %val 0
+         %16 = OpCompositeExtract %v4half %val 1
+         %17 = OpCompositeExtract %v4half %val 2
+         %18 = OpCompositeExtract %v4half %val 3
+         %19 = OpCompositeConstruct %mat4v4half %15 %16 %17 %18
+               OpReturnValue %19
+               OpFunctionEnd
+          %f = OpFunction %void None %20
+         %23 = OpLabel
+         %29 = OpAccessChain %_ptr_Uniform_mat4x4_f16 %u %uint_0 %uint_2
+         %30 = OpLoad %mat4x4_f16 %29
+         %25 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %30
+         %24 = OpTranspose %mat4v4half %25
+         %36 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %33 %uint_1
+         %37 = OpLoad %v4half %36
+         %38 = OpVectorShuffle %v4half %37 %37 1 3 0 2
+         %31 = OpExtInst %half %32 Length %38
+         %40 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %33 %uint_1
+         %41 = OpLoad %v4half %40
+         %42 = OpVectorShuffle %v4half %41 %41 1 3 0 2
+         %43 = OpCompositeExtract %half %42 0
+         %39 = OpExtInst %half %32 FAbs %43
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..d6bd702
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].ywxz);
+  let a = abs(u[0][1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl
new file mode 100644
index 0000000..3566b7e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f16>, 4>;
+
+fn a(a : array<mat4x4<f16>, 4>) {}
+fn b(m : mat4x4<f16>) {}
+fn c(v : vec4<f16>) {}
+fn d(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].ywxz);
+    d(u[1][0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..99d2802
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,65 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(matrix<float16_t, 4, 4> a_1[4]) {
+}
+
+void b(matrix<float16_t, 4, 4> m) {
+}
+
+void c(vector<float16_t, 4> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 4> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 32u));
+  uint2 ubo_load_8 = u[2].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  c(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  uint2 ubo_load_9 = u[2].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  d(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..62edde6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,73 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(matrix<float16_t, 4, 4> a_1[4]) {
+}
+
+void b(matrix<float16_t, 4, 4> m) {
+}
+
+void c(vector<float16_t, 4> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 4> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 32u));
+  uint2 ubo_load_8 = u[2].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  c(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  uint2 ubo_load_9 = u[2].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  d(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FAC4F2FC00(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FAC4F2FC00(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FAC4F2FC00(11,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FAC4F2FC00(14,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..26fb428
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,52 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+  f16vec4 col2;
+  f16vec4 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x4_f16 inner[4];
+} u;
+
+void a(f16mat4 a_1[4]) {
+}
+
+void b(f16mat4 m) {
+}
+
+void c(f16vec4 v) {
+}
+
+void d(float16_t f_1) {
+}
+
+f16mat4 conv_mat4x4_f16(mat4x4_f16 val) {
+  return f16mat4(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4[4] conv_arr4_mat4x4_f16(mat4x4_f16 val[4]) {
+  f16mat4 arr[4] = f16mat4[4](f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  a(conv_arr4_mat4x4_f16(u.inner));
+  b(conv_mat4x4_f16(u.inner[1u]));
+  c(u.inner[1u].col0.ywxz);
+  d(u.inner[1u].col0.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..7824fac
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<half4x4, 4> a_1) {
+}
+
+void b(half4x4 m) {
+}
+
+void c(half4 v) {
+}
+
+void d(half f_1) {
+}
+
+kernel void f(const constant tint_array<half4x4, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(half4((*(tint_symbol))[1][0]).ywxz);
+  d(half4((*(tint_symbol))[1][0]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..b1b21d5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,169 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 97
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x4_f16 "mat4x4_f16"
+               OpMemberName %mat4x4_f16 0 "col0"
+               OpMemberName %mat4x4_f16 1 "col1"
+               OpMemberName %mat4x4_f16 2 "col2"
+               OpMemberName %mat4x4_f16 3 "col3"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %conv_mat4x4_f16 "conv_mat4x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x4_f16 "conv_arr4_mat4x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 1 Offset 8
+               OpMemberDecorate %mat4x4_f16 2 Offset 16
+               OpMemberDecorate %mat4x4_f16 3 Offset 24
+               OpDecorate %_arr_mat4x4_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v4half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat4x4_f16 = OpTypeStruct %v4half %v4half %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x4_f16_uint_4 = OpTypeArray %mat4x4_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x4_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat4v4half = OpTypeMatrix %v4half 4
+%_arr_mat4v4half_uint_4 = OpTypeArray %mat4v4half %uint_4
+         %10 = OpTypeFunction %void %_arr_mat4v4half_uint_4
+         %17 = OpTypeFunction %void %mat4v4half
+         %21 = OpTypeFunction %void %v4half
+         %25 = OpTypeFunction %void %half
+         %29 = OpTypeFunction %mat4v4half %mat4x4_f16
+         %38 = OpTypeFunction %_arr_mat4v4half_uint_4 %_arr_mat4x4_f16_uint_4
+%_ptr_Function__arr_mat4v4half_uint_4 = OpTypePointer Function %_arr_mat4v4half_uint_4
+         %44 = OpConstantNull %_arr_mat4v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %47 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x4_f16_uint_4 = OpTypePointer Function %_arr_mat4x4_f16_uint_4
+         %60 = OpConstantNull %_arr_mat4x4_f16_uint_4
+%_ptr_Function_mat4v4half = OpTypePointer Function %mat4v4half
+%_ptr_Function_mat4x4_f16 = OpTypePointer Function %mat4x4_f16
+     %uint_1 = OpConstant %uint 1
+         %73 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x4_f16_uint_4
+%_ptr_Uniform_mat4x4_f16 = OpTypePointer Uniform %mat4x4_f16
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat4v4half_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %m = OpFunctionParameter %mat4v4half
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %v = OpFunctionParameter %v4half
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+        %f_1 = OpFunctionParameter %half
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%conv_mat4x4_f16 = OpFunction %mat4v4half None %29
+        %val = OpFunctionParameter %mat4x4_f16
+         %32 = OpLabel
+         %33 = OpCompositeExtract %v4half %val 0
+         %34 = OpCompositeExtract %v4half %val 1
+         %35 = OpCompositeExtract %v4half %val 2
+         %36 = OpCompositeExtract %v4half %val 3
+         %37 = OpCompositeConstruct %mat4v4half %33 %34 %35 %36
+               OpReturnValue %37
+               OpFunctionEnd
+%conv_arr4_mat4x4_f16 = OpFunction %_arr_mat4v4half_uint_4 None %38
+      %val_0 = OpFunctionParameter %_arr_mat4x4_f16_uint_4
+         %41 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v4half_uint_4 Function %44
+          %i = OpVariable %_ptr_Function_uint Function %47
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x4_f16_uint_4 Function %60
+               OpBranch %48
+         %48 = OpLabel
+               OpLoopMerge %49 %50 None
+               OpBranch %51
+         %51 = OpLabel
+         %53 = OpLoad %uint %i
+         %54 = OpULessThan %bool %53 %uint_4
+         %52 = OpLogicalNot %bool %54
+               OpSelectionMerge %56 None
+               OpBranchConditional %52 %57 %56
+         %57 = OpLabel
+               OpBranch %49
+         %56 = OpLabel
+               OpStore %var_for_index %val_0
+         %61 = OpLoad %uint %i
+         %63 = OpAccessChain %_ptr_Function_mat4v4half %arr %61
+         %65 = OpLoad %uint %i
+         %67 = OpAccessChain %_ptr_Function_mat4x4_f16 %var_for_index %65
+         %68 = OpLoad %mat4x4_f16 %67
+         %64 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %68
+               OpStore %63 %64
+               OpBranch %50
+         %50 = OpLabel
+         %69 = OpLoad %uint %i
+         %71 = OpIAdd %uint %69 %uint_1
+               OpStore %i %71
+               OpBranch %48
+         %49 = OpLabel
+         %72 = OpLoad %_arr_mat4v4half_uint_4 %arr
+               OpReturnValue %72
+               OpFunctionEnd
+          %f = OpFunction %void None %73
+         %75 = OpLabel
+         %80 = OpAccessChain %_ptr_Uniform__arr_mat4x4_f16_uint_4 %u %uint_0
+         %81 = OpLoad %_arr_mat4x4_f16_uint_4 %80
+         %77 = OpFunctionCall %_arr_mat4v4half_uint_4 %conv_arr4_mat4x4_f16 %81
+         %76 = OpFunctionCall %void %a %77
+         %85 = OpAccessChain %_ptr_Uniform_mat4x4_f16 %u %uint_0 %uint_1
+         %86 = OpLoad %mat4x4_f16 %85
+         %83 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %86
+         %82 = OpFunctionCall %void %b %83
+         %89 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %uint_1 %uint_0
+         %90 = OpLoad %v4half %89
+         %91 = OpVectorShuffle %v4half %90 %90 1 3 0 2
+         %87 = OpFunctionCall %void %c %91
+         %93 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %uint_1 %uint_0
+         %94 = OpLoad %v4half %93
+         %95 = OpVectorShuffle %v4half %94 %94 1 3 0 2
+         %96 = OpCompositeExtract %half %95 0
+         %92 = OpFunctionCall %void %d %96
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..830f697
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,23 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f16>, 4>;
+
+fn a(a : array<mat4x4<f16>, 4>) {
+}
+
+fn b(m : mat4x4<f16>) {
+}
+
+fn c(v : vec4<f16>) {
+}
+
+fn d(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].ywxz);
+  d(u[1][0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl
new file mode 100644
index 0000000..ea2580f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f16>, 4>;
+var<private> p : array<mat4x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].ywxz;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..d5fc3e3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,51 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static matrix<float16_t, 4, 4> p[4] = (matrix<float16_t, 4, 4>[4])0;
+
+matrix<float16_t, 4, 4> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  p[1][0] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz;
+  p[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7983cff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,56 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static matrix<float16_t, 4, 4> p[4] = (matrix<float16_t, 4, 4>[4])0;
+
+matrix<float16_t, 4, 4> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  p[1][0] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz;
+  p[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000219133D8C70(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..67c856e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,41 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+  f16vec4 col2;
+  f16vec4 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x4_f16 inner[4];
+} u;
+
+f16mat4 p[4] = f16mat4[4](f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+f16mat4 conv_mat4x4_f16(mat4x4_f16 val) {
+  return f16mat4(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4[4] conv_arr4_mat4x4_f16(mat4x4_f16 val[4]) {
+  f16mat4 arr[4] = f16mat4[4](f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  p = conv_arr4_mat4x4_f16(u.inner);
+  p[1] = conv_mat4x4_f16(u.inner[2u]);
+  p[1][0] = u.inner[0u].col1.ywxz;
+  p[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..7cc2442
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x4, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<half4x4, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = half4((*(tint_symbol_1))[0][1]).ywxz;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..8a9baae
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,149 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 88
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x4_f16 "mat4x4_f16"
+               OpMemberName %mat4x4_f16 0 "col0"
+               OpMemberName %mat4x4_f16 1 "col1"
+               OpMemberName %mat4x4_f16 2 "col2"
+               OpMemberName %mat4x4_f16 3 "col3"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %conv_mat4x4_f16 "conv_mat4x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x4_f16 "conv_arr4_mat4x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 1 Offset 8
+               OpMemberDecorate %mat4x4_f16 2 Offset 16
+               OpMemberDecorate %mat4x4_f16 3 Offset 24
+               OpDecorate %_arr_mat4x4_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v4half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat4x4_f16 = OpTypeStruct %v4half %v4half %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x4_f16_uint_4 = OpTypeArray %mat4x4_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x4_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+%_arr_mat4v4half_uint_4 = OpTypeArray %mat4v4half %uint_4
+%_ptr_Private__arr_mat4v4half_uint_4 = OpTypePointer Private %_arr_mat4v4half_uint_4
+         %14 = OpConstantNull %_arr_mat4v4half_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat4v4half_uint_4 Private %14
+         %15 = OpTypeFunction %mat4v4half %mat4x4_f16
+         %24 = OpTypeFunction %_arr_mat4v4half_uint_4 %_arr_mat4x4_f16_uint_4
+%_ptr_Function__arr_mat4v4half_uint_4 = OpTypePointer Function %_arr_mat4v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %32 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x4_f16_uint_4 = OpTypePointer Function %_arr_mat4x4_f16_uint_4
+         %45 = OpConstantNull %_arr_mat4x4_f16_uint_4
+%_ptr_Function_mat4v4half = OpTypePointer Function %mat4v4half
+%_ptr_Function_mat4x4_f16 = OpTypePointer Function %mat4x4_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %58 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x4_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat4v4half = OpTypePointer Private %mat4v4half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x4_f16 = OpTypePointer Uniform %mat4x4_f16
+         %76 = OpConstantNull %int
+%_ptr_Private_v4half = OpTypePointer Private %v4half
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+%_ptr_Private_half = OpTypePointer Private %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat4x4_f16 = OpFunction %mat4v4half None %15
+        %val = OpFunctionParameter %mat4x4_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v4half %val 0
+         %20 = OpCompositeExtract %v4half %val 1
+         %21 = OpCompositeExtract %v4half %val 2
+         %22 = OpCompositeExtract %v4half %val 3
+         %23 = OpCompositeConstruct %mat4v4half %19 %20 %21 %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_mat4x4_f16 = OpFunction %_arr_mat4v4half_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_mat4x4_f16_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v4half_uint_4 Function %14
+          %i = OpVariable %_ptr_Function_uint Function %32
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x4_f16_uint_4 Function %45
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %34 %35 None
+               OpBranch %36
+         %36 = OpLabel
+         %38 = OpLoad %uint %i
+         %39 = OpULessThan %bool %38 %uint_4
+         %37 = OpLogicalNot %bool %39
+               OpSelectionMerge %41 None
+               OpBranchConditional %37 %42 %41
+         %42 = OpLabel
+               OpBranch %34
+         %41 = OpLabel
+               OpStore %var_for_index %val_0
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_mat4v4half %arr %46
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_mat4x4_f16 %var_for_index %50
+         %53 = OpLoad %mat4x4_f16 %52
+         %49 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %53
+               OpStore %48 %49
+               OpBranch %35
+         %35 = OpLabel
+         %54 = OpLoad %uint %i
+         %56 = OpIAdd %uint %54 %uint_1
+               OpStore %i %56
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %_arr_mat4v4half_uint_4 %arr
+               OpReturnValue %57
+               OpFunctionEnd
+          %f = OpFunction %void None %58
+         %61 = OpLabel
+         %65 = OpAccessChain %_ptr_Uniform__arr_mat4x4_f16_uint_4 %u %uint_0
+         %66 = OpLoad %_arr_mat4x4_f16_uint_4 %65
+         %62 = OpFunctionCall %_arr_mat4v4half_uint_4 %conv_arr4_mat4x4_f16 %66
+               OpStore %p %62
+         %70 = OpAccessChain %_ptr_Private_mat4v4half %p %int_1
+         %74 = OpAccessChain %_ptr_Uniform_mat4x4_f16 %u %uint_0 %uint_2
+         %75 = OpLoad %mat4x4_f16 %74
+         %71 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %75
+               OpStore %70 %71
+         %78 = OpAccessChain %_ptr_Private_v4half %p %int_1 %76
+         %80 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %32 %uint_1
+         %81 = OpLoad %v4half %80
+         %82 = OpVectorShuffle %v4half %81 %81 1 3 0 2
+               OpStore %78 %82
+         %84 = OpAccessChain %_ptr_Private_half %p %int_1 %76 %uint_0
+         %86 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %32 %uint_1 %32
+         %87 = OpLoad %half %86
+               OpStore %84 %87
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..635f7aa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f16>, 4>;
+
+var<private> p : array<mat4x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].ywxz;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl
new file mode 100644
index 0000000..6f266ff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f16>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].ywxz;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..8a3aa03
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,67 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value[4]) {
+  matrix<float16_t, 4, 4> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 4> tint_symbol_4(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_4(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  s.Store<vector<float16_t, 4> >(32u, vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  s.Store<float16_t>(32u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..85e99c8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,73 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value[4]) {
+  matrix<float16_t, 4, 4> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 4> tint_symbol_4(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_4(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  s.Store<vector<float16_t, 4> >(32u, vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  s.Store<float16_t>(32u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002420B3F4EF0(6,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002420B3F4EF0(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..a1c9b3d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,44 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+  f16vec4 col2;
+  f16vec4 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x4_f16 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat4 inner[4];
+} s;
+
+f16mat4 conv_mat4x4_f16(mat4x4_f16 val) {
+  return f16mat4(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4[4] conv_arr4_mat4x4_f16(mat4x4_f16 val[4]) {
+  f16mat4 arr[4] = f16mat4[4](f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  s.inner = conv_arr4_mat4x4_f16(u.inner);
+  s.inner[1] = conv_mat4x4_f16(u.inner[2u]);
+  s.inner[1][0] = u.inner[0u].col1.ywxz;
+  s.inner[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..a1a1e39
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<half4x4, 4>* tint_symbol [[buffer(1)]], const constant tint_array<half4x4, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half4((*(tint_symbol_1))[0][1]).ywxz;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..ceb3651
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,160 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 91
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x4_f16 "mat4x4_f16"
+               OpMemberName %mat4x4_f16 0 "col0"
+               OpMemberName %mat4x4_f16 1 "col1"
+               OpMemberName %mat4x4_f16 2 "col2"
+               OpMemberName %mat4x4_f16 3 "col3"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %conv_mat4x4_f16 "conv_mat4x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x4_f16 "conv_arr4_mat4x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 1 Offset 8
+               OpMemberDecorate %mat4x4_f16 2 Offset 16
+               OpMemberDecorate %mat4x4_f16 3 Offset 24
+               OpDecorate %_arr_mat4x4_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 8
+               OpDecorate %_arr_mat4v4half_uint_4 ArrayStride 32
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat4x4_f16 = OpTypeStruct %v4half %v4half %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x4_f16_uint_4 = OpTypeArray %mat4x4_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x4_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+%_arr_mat4v4half_uint_4 = OpTypeArray %mat4v4half %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v4half_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %15 = OpTypeFunction %mat4v4half %mat4x4_f16
+         %24 = OpTypeFunction %_arr_mat4v4half_uint_4 %_arr_mat4x4_f16_uint_4
+%_ptr_Function__arr_mat4v4half_uint_4 = OpTypePointer Function %_arr_mat4v4half_uint_4
+         %30 = OpConstantNull %_arr_mat4v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %33 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x4_f16_uint_4 = OpTypePointer Function %_arr_mat4x4_f16_uint_4
+         %46 = OpConstantNull %_arr_mat4x4_f16_uint_4
+%_ptr_Function_mat4v4half = OpTypePointer Function %mat4v4half
+%_ptr_Function_mat4x4_f16 = OpTypePointer Function %mat4x4_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %59 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat4v4half_uint_4 = OpTypePointer StorageBuffer %_arr_mat4v4half_uint_4
+%_ptr_Uniform__arr_mat4x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x4_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat4v4half = OpTypePointer StorageBuffer %mat4v4half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x4_f16 = OpTypePointer Uniform %mat4x4_f16
+         %79 = OpConstantNull %int
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat4x4_f16 = OpFunction %mat4v4half None %15
+        %val = OpFunctionParameter %mat4x4_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v4half %val 0
+         %20 = OpCompositeExtract %v4half %val 1
+         %21 = OpCompositeExtract %v4half %val 2
+         %22 = OpCompositeExtract %v4half %val 3
+         %23 = OpCompositeConstruct %mat4v4half %19 %20 %21 %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_mat4x4_f16 = OpFunction %_arr_mat4v4half_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_mat4x4_f16_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v4half_uint_4 Function %30
+          %i = OpVariable %_ptr_Function_uint Function %33
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x4_f16_uint_4 Function %46
+               OpBranch %34
+         %34 = OpLabel
+               OpLoopMerge %35 %36 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %uint %i
+         %40 = OpULessThan %bool %39 %uint_4
+         %38 = OpLogicalNot %bool %40
+               OpSelectionMerge %42 None
+               OpBranchConditional %38 %43 %42
+         %43 = OpLabel
+               OpBranch %35
+         %42 = OpLabel
+               OpStore %var_for_index %val_0
+         %47 = OpLoad %uint %i
+         %49 = OpAccessChain %_ptr_Function_mat4v4half %arr %47
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_mat4x4_f16 %var_for_index %51
+         %54 = OpLoad %mat4x4_f16 %53
+         %50 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %54
+               OpStore %49 %50
+               OpBranch %36
+         %36 = OpLabel
+         %55 = OpLoad %uint %i
+         %57 = OpIAdd %uint %55 %uint_1
+               OpStore %i %57
+               OpBranch %34
+         %35 = OpLabel
+         %58 = OpLoad %_arr_mat4v4half_uint_4 %arr
+               OpReturnValue %58
+               OpFunctionEnd
+          %f = OpFunction %void None %59
+         %62 = OpLabel
+         %65 = OpAccessChain %_ptr_StorageBuffer__arr_mat4v4half_uint_4 %s %uint_0
+         %68 = OpAccessChain %_ptr_Uniform__arr_mat4x4_f16_uint_4 %u %uint_0
+         %69 = OpLoad %_arr_mat4x4_f16_uint_4 %68
+         %66 = OpFunctionCall %_arr_mat4v4half_uint_4 %conv_arr4_mat4x4_f16 %69
+               OpStore %65 %66
+         %73 = OpAccessChain %_ptr_StorageBuffer_mat4v4half %s %uint_0 %int_1
+         %77 = OpAccessChain %_ptr_Uniform_mat4x4_f16 %u %uint_0 %uint_2
+         %78 = OpLoad %mat4x4_f16 %77
+         %74 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %78
+               OpStore %73 %74
+         %81 = OpAccessChain %_ptr_StorageBuffer_v4half %s %uint_0 %int_1 %79
+         %83 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %33 %uint_1
+         %84 = OpLoad %v4half %83
+         %85 = OpVectorShuffle %v4half %84 %84 1 3 0 2
+               OpStore %81 %85
+         %87 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %int_1 %79 %uint_0
+         %89 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %33 %uint_1 %33
+         %90 = OpLoad %half %89
+               OpStore %87 %90
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..88e110e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f16>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].ywxz;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..aff44e7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f16>, 4>;
+var<workgroup> w : array<mat4x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].ywxz;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..94db3c4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,66 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared matrix<float16_t, 4, 4> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 4> tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 4, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  w[1][0] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz;
+  w[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ccb834a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,71 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared matrix<float16_t, 4, 4> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 4> tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+typedef matrix<float16_t, 4, 4> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 4> arr[4] = (matrix<float16_t, 4, 4>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 4, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  w[1][0] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz;
+  w[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002B4C90BEEA0(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..470c0fa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,48 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x4_f16 {
+  f16vec4 col0;
+  f16vec4 col1;
+  f16vec4 col2;
+  f16vec4 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x4_f16 inner[4];
+} u;
+
+shared f16mat4 w[4];
+f16mat4 conv_mat4x4_f16(mat4x4_f16 val) {
+  return f16mat4(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4[4] conv_arr4_mat4x4_f16(mat4x4_f16 val[4]) {
+  f16mat4 arr[4] = f16mat4[4](f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x4_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = f16mat4(f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf));
+    }
+  }
+  barrier();
+  w = conv_arr4_mat4x4_f16(u.inner);
+  w[1] = conv_mat4x4_f16(u.inner[2u]);
+  w[1][0] = u.inner[0u].col1.ywxz;
+  w[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..bd1f293
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<half4x4, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<half4x4, 4>* const tint_symbol, const constant tint_array<half4x4, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = half4x4(half4(0.0h), half4(0.0h), half4(0.0h), half4(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half4((*(tint_symbol_1))[0][1]).ywxz;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<half4x4, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<half4x4, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..a50827e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,192 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 113
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x4_f16 "mat4x4_f16"
+               OpMemberName %mat4x4_f16 0 "col0"
+               OpMemberName %mat4x4_f16 1 "col1"
+               OpMemberName %mat4x4_f16 2 "col2"
+               OpMemberName %mat4x4_f16 3 "col3"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %conv_mat4x4_f16 "conv_mat4x4_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x4_f16 "conv_arr4_mat4x4_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 0 Offset 0
+               OpMemberDecorate %mat4x4_f16 1 Offset 8
+               OpMemberDecorate %mat4x4_f16 2 Offset 16
+               OpMemberDecorate %mat4x4_f16 3 Offset 24
+               OpDecorate %_arr_mat4x4_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v4half_uint_4 ArrayStride 32
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+ %mat4x4_f16 = OpTypeStruct %v4half %v4half %v4half %v4half
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x4_f16_uint_4 = OpTypeArray %mat4x4_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x4_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+%_arr_mat4v4half_uint_4 = OpTypeArray %mat4v4half %uint_4
+%_ptr_Workgroup__arr_mat4v4half_uint_4 = OpTypePointer Workgroup %_arr_mat4v4half_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat4v4half_uint_4 Workgroup
+         %16 = OpTypeFunction %mat4v4half %mat4x4_f16
+         %25 = OpTypeFunction %_arr_mat4v4half_uint_4 %_arr_mat4x4_f16_uint_4
+%_ptr_Function__arr_mat4v4half_uint_4 = OpTypePointer Function %_arr_mat4v4half_uint_4
+         %31 = OpConstantNull %_arr_mat4v4half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %34 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x4_f16_uint_4 = OpTypePointer Function %_arr_mat4x4_f16_uint_4
+         %47 = OpConstantNull %_arr_mat4x4_f16_uint_4
+%_ptr_Function_mat4v4half = OpTypePointer Function %mat4v4half
+%_ptr_Function_mat4x4_f16 = OpTypePointer Function %mat4x4_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %60 = OpTypeFunction %void %uint
+%_ptr_Workgroup_mat4v4half = OpTypePointer Workgroup %mat4v4half
+         %78 = OpConstantNull %mat4v4half
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x4_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x4_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat4x4_f16 = OpTypePointer Uniform %mat4x4_f16
+         %96 = OpConstantNull %int
+%_ptr_Workgroup_v4half = OpTypePointer Workgroup %v4half
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %108 = OpTypeFunction %void
+%conv_mat4x4_f16 = OpFunction %mat4v4half None %16
+        %val = OpFunctionParameter %mat4x4_f16
+         %19 = OpLabel
+         %20 = OpCompositeExtract %v4half %val 0
+         %21 = OpCompositeExtract %v4half %val 1
+         %22 = OpCompositeExtract %v4half %val 2
+         %23 = OpCompositeExtract %v4half %val 3
+         %24 = OpCompositeConstruct %mat4v4half %20 %21 %22 %23
+               OpReturnValue %24
+               OpFunctionEnd
+%conv_arr4_mat4x4_f16 = OpFunction %_arr_mat4v4half_uint_4 None %25
+      %val_0 = OpFunctionParameter %_arr_mat4x4_f16_uint_4
+         %28 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v4half_uint_4 Function %31
+          %i = OpVariable %_ptr_Function_uint Function %34
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x4_f16_uint_4 Function %47
+               OpBranch %35
+         %35 = OpLabel
+               OpLoopMerge %36 %37 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %uint %i
+         %41 = OpULessThan %bool %40 %uint_4
+         %39 = OpLogicalNot %bool %41
+               OpSelectionMerge %43 None
+               OpBranchConditional %39 %44 %43
+         %44 = OpLabel
+               OpBranch %36
+         %43 = OpLabel
+               OpStore %var_for_index %val_0
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_mat4v4half %arr %48
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_mat4x4_f16 %var_for_index %52
+         %55 = OpLoad %mat4x4_f16 %54
+         %51 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %55
+               OpStore %50 %51
+               OpBranch %37
+         %37 = OpLabel
+         %56 = OpLoad %uint %i
+         %58 = OpIAdd %uint %56 %uint_1
+               OpStore %i %58
+               OpBranch %35
+         %36 = OpLabel
+         %59 = OpLoad %_arr_mat4v4half_uint_4 %arr
+               OpReturnValue %59
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %60
+%local_invocation_index = OpFunctionParameter %uint
+         %64 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %34
+               OpStore %idx %local_invocation_index
+               OpBranch %66
+         %66 = OpLabel
+               OpLoopMerge %67 %68 None
+               OpBranch %69
+         %69 = OpLabel
+         %71 = OpLoad %uint %idx
+         %72 = OpULessThan %bool %71 %uint_4
+         %70 = OpLogicalNot %bool %72
+               OpSelectionMerge %73 None
+               OpBranchConditional %70 %74 %73
+         %74 = OpLabel
+               OpBranch %67
+         %73 = OpLabel
+         %75 = OpLoad %uint %idx
+         %77 = OpAccessChain %_ptr_Workgroup_mat4v4half %w %75
+               OpStore %77 %78
+               OpBranch %68
+         %68 = OpLabel
+         %79 = OpLoad %uint %idx
+         %80 = OpIAdd %uint %79 %uint_1
+               OpStore %idx %80
+               OpBranch %66
+         %67 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %87 = OpAccessChain %_ptr_Uniform__arr_mat4x4_f16_uint_4 %u %uint_0
+         %88 = OpLoad %_arr_mat4x4_f16_uint_4 %87
+         %84 = OpFunctionCall %_arr_mat4v4half_uint_4 %conv_arr4_mat4x4_f16 %88
+               OpStore %w %84
+         %91 = OpAccessChain %_ptr_Workgroup_mat4v4half %w %int_1
+         %94 = OpAccessChain %_ptr_Uniform_mat4x4_f16 %u %uint_0 %uint_2
+         %95 = OpLoad %mat4x4_f16 %94
+         %92 = OpFunctionCall %mat4v4half %conv_mat4x4_f16 %95
+               OpStore %91 %92
+         %98 = OpAccessChain %_ptr_Workgroup_v4half %w %int_1 %96
+        %100 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %34 %uint_1
+        %101 = OpLoad %v4half %100
+        %102 = OpVectorShuffle %v4half %101 %101 1 3 0 2
+               OpStore %98 %102
+        %104 = OpAccessChain %_ptr_Workgroup_half %w %int_1 %96 %uint_0
+        %106 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %34 %uint_1 %34
+        %107 = OpLoad %half %106
+               OpStore %104 %107
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %108
+        %110 = OpLabel
+        %112 = OpLoad %uint %local_invocation_index_1
+        %111 = OpFunctionCall %void %f_inner %112
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..98299cd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f16>, 4>;
+
+var<workgroup> w : array<mat4x4<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].ywxz;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..cdc13d2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x4<f32>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat4x4<f32>, 4> = *p_a;
+  let l_a_i     : mat4x4<f32>           = *p_a_i;
+  let l_a_i_i   : vec4<f32>             = *p_a_i_i;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..cfb9ecf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,39 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x4 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float4x4 l_a[4] = tint_symbol(a, 0u);
+  const float4x4 l_a_i = tint_symbol_1(a, (64u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float4 l_a_i_i = asfloat(a[scalar_offset_4 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..cfb9ecf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,39 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x4 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float4x4 l_a[4] = tint_symbol(a, 0u);
+  const float4x4 l_a_i = tint_symbol_1(a, (64u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float4 l_a_i_i = asfloat(a[scalar_offset_4 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..da11781
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,27 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat4 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_i_save = tint_symbol_1;
+  mat4 l_a[4] = a.inner;
+  mat4 l_a_i = a.inner[p_a_i_save];
+  vec4 l_a_i_i = a.inner[p_a_i_save][p_a_i_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..4aeaf64
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<float4x4, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<float4x4, 4> const l_a = *(tint_symbol_3);
+  float4x4 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  float4 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..0cb33b1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,64 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v4float_uint_4 ArrayStride 64
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4
+    %a_block = OpTypeStruct %_arr_mat4v4float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %21 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v4float_uint_4 = OpTypePointer Uniform %_arr_mat4v4float_uint_4
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+          %f = OpFunction %void None %21
+         %24 = OpLabel
+         %25 = OpFunctionCall %int %i
+         %26 = OpFunctionCall %int %i
+         %29 = OpAccessChain %_ptr_Uniform__arr_mat4v4float_uint_4 %a %uint_0
+         %30 = OpLoad %_arr_mat4v4float_uint_4 %29
+         %32 = OpAccessChain %_ptr_Uniform_mat4v4float %a %uint_0 %25
+         %33 = OpLoad %mat4v4float %32
+         %35 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %25 %26
+         %36 = OpLoad %v4float %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..3221228
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x4<f32>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat4x4<f32>, 4> = *(p_a);
+  let l_a_i : mat4x4<f32> = *(p_a_i);
+  let l_a_i_i : vec4<f32> = *(p_a_i_i);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..120e0f8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,12 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat4x4<f32>, 4> = *p_a;
+  let l_a_i     : mat4x4<f32>           = *p_a_2;
+  let l_a_i_i   : vec4<f32>             = *p_a_2_1;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ac24f04
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+
+float4x4 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x4 l_a[4] = tint_symbol(a, 0u);
+  const float4x4 l_a_i = tint_symbol_1(a, 128u);
+  const float4 l_a_i_i = asfloat(a[9]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ac24f04
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+
+float4x4 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x4 l_a[4] = tint_symbol(a, 0u);
+  const float4x4 l_a_i = tint_symbol_1(a, 128u);
+  const float4 l_a_i_i = asfloat(a[9]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..7edadaf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat4 inner[4];
+} a;
+
+void f() {
+  mat4 l_a[4] = a.inner;
+  mat4 l_a_i = a.inner[2];
+  vec4 l_a_i_i = a.inner[2][1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..4686650
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float4x4, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<float4x4, 4> const l_a = *(tint_symbol);
+  float4x4 const l_a_i = (*(tint_symbol))[2];
+  float4 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..9cf1346
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,49 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v4float_uint_4 ArrayStride 64
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4
+    %a_block = OpTypeStruct %_arr_mat4v4float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v4float_uint_4 = OpTypePointer Uniform %_arr_mat4v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform__arr_mat4v4float_uint_4 %a %uint_0
+         %17 = OpLoad %_arr_mat4v4float_uint_4 %16
+         %21 = OpAccessChain %_ptr_Uniform_mat4v4float %a %uint_0 %int_2
+         %22 = OpLoad %mat4v4float %21
+         %25 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %int_2 %int_1
+         %26 = OpLoad %v4float %25
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..750bb8f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat4x4<f32>, 4> = *(p_a);
+  let l_a_i : mat4x4<f32> = *(p_a_2);
+  let l_a_i_i : vec4<f32> = *(p_a_2_1);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl
new file mode 100644
index 0000000..2f236c9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].ywxz);
+    let a = abs(u[0][1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..bb637b6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+float4x4 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x4 t = transpose(tint_symbol(u, 128u));
+  const float l = length(asfloat(u[1]).ywxz);
+  const float a = abs(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..bb637b6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+float4x4 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x4 t = transpose(tint_symbol(u, 128u));
+  const float l = length(asfloat(u[1]).ywxz);
+  const float a = abs(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..8268770
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4 inner[4];
+} u;
+
+void f() {
+  mat4 t = transpose(u.inner[2]);
+  float l = length(u.inner[0][1].ywxz);
+  float a = abs(u.inner[0][1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..b7a398b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float4x4, 4>* tint_symbol [[buffer(0)]]) {
+  float4x4 const t = transpose((*(tint_symbol))[2]);
+  float const l = length(float4((*(tint_symbol))[0][1]).ywxz);
+  float const a = fabs(float4((*(tint_symbol))[0][1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..e923130
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 34
+; Schema: 0
+               OpCapability Shader
+         %22 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v4float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+         %23 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0 %int_2
+         %20 = OpLoad %mat4v4float %19
+         %14 = OpTranspose %mat4v4float %20
+         %26 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %23 %int_1
+         %27 = OpLoad %v4float %26
+         %28 = OpVectorShuffle %v4float %27 %27 1 3 0 2
+         %21 = OpExtInst %float %22 Length %28
+         %30 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %23 %int_1
+         %31 = OpLoad %v4float %30
+         %32 = OpVectorShuffle %v4float %31 %31 1 3 0 2
+         %33 = OpCompositeExtract %float %32 0
+         %29 = OpExtInst %float %22 FAbs %33
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..eec03cd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].ywxz);
+  let a = abs(u[0][1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl
new file mode 100644
index 0000000..2a62ac9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f32>, 4>;
+
+fn a(a : array<mat4x4<f32>, 4>) {}
+fn b(m : mat4x4<f32>) {}
+fn c(v : vec4<f32>) {}
+fn d(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].ywxz);
+    d(u[1][0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c6389d9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,43 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+void a(float4x4 a_1[4]) {
+}
+
+void b(float4x4 m) {
+}
+
+void c(float4 v) {
+}
+
+void d(float f_1) {
+}
+
+float4x4 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 64u));
+  c(asfloat(u[4]).ywxz);
+  d(asfloat(u[4]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c6389d9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,43 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+void a(float4x4 a_1[4]) {
+}
+
+void b(float4x4 m) {
+}
+
+void c(float4 v) {
+}
+
+void d(float f_1) {
+}
+
+float4x4 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 64u));
+  c(asfloat(u[4]).ywxz);
+  d(asfloat(u[4]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..20116f4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4 inner[4];
+} u;
+
+void a(mat4 a_1[4]) {
+}
+
+void b(mat4 m) {
+}
+
+void c(vec4 v) {
+}
+
+void d(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  c(u.inner[1][0].ywxz);
+  d(u.inner[1][0].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..1d08324
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<float4x4, 4> a_1) {
+}
+
+void b(float4x4 m) {
+}
+
+void c(float4 v) {
+}
+
+void d(float f_1) {
+}
+
+kernel void f(const constant tint_array<float4x4, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(float4((*(tint_symbol))[1][0]).ywxz);
+  d(float4((*(tint_symbol))[1][0]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..776c363
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 52
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v4float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void %_arr_mat4v4float_uint_4
+         %15 = OpTypeFunction %void %mat4v4float
+         %19 = OpTypeFunction %void %v4float
+         %23 = OpTypeFunction %void %float
+         %27 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v4float_uint_4 = OpTypePointer Uniform %_arr_mat4v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+         %42 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat4v4float_uint_4
+         %14 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %15
+          %m = OpFunctionParameter %mat4v4float
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %19
+          %v = OpFunctionParameter %v4float
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %23
+        %f_1 = OpFunctionParameter %float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %27
+         %29 = OpLabel
+         %33 = OpAccessChain %_ptr_Uniform__arr_mat4v4float_uint_4 %u %uint_0
+         %34 = OpLoad %_arr_mat4v4float_uint_4 %33
+         %30 = OpFunctionCall %void %a %34
+         %39 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0 %int_1
+         %40 = OpLoad %mat4v4float %39
+         %35 = OpFunctionCall %void %b %40
+         %44 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1 %42
+         %45 = OpLoad %v4float %44
+         %46 = OpVectorShuffle %v4float %45 %45 1 3 0 2
+         %41 = OpFunctionCall %void %c %46
+         %48 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1 %42
+         %49 = OpLoad %v4float %48
+         %50 = OpVectorShuffle %v4float %49 %49 1 3 0 2
+         %51 = OpCompositeExtract %float %50 0
+         %47 = OpFunctionCall %void %d %51
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..d8a4bf3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f32>, 4>;
+
+fn a(a : array<mat4x4<f32>, 4>) {
+}
+
+fn b(m : mat4x4<f32>) {
+}
+
+fn c(v : vec4<f32>) {
+}
+
+fn d(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].ywxz);
+  d(u[1][0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl
new file mode 100644
index 0000000..bc19927
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f32>, 4>;
+var<private> p : array<mat4x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].ywxz;
+    p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..faaf6bb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,32 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+static float4x4 p[4] = (float4x4[4])0;
+
+float4x4 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 128u);
+  p[1][0] = asfloat(u[1]).ywxz;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..faaf6bb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,32 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+static float4x4 p[4] = (float4x4[4])0;
+
+float4x4 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 128u);
+  p[1][0] = asfloat(u[1]).ywxz;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..6e23a7b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4 inner[4];
+} u;
+
+mat4 p[4] = mat4[4](mat4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[1][0] = u.inner[0][1].ywxz;
+  p[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..9e7e72c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float4x4, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<float4x4, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = float4((*(tint_symbol_1))[0][1]).ywxz;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..0e3dad0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 41
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v4float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_mat4v4float_uint_4 = OpTypePointer Private %_arr_mat4v4float_uint_4
+         %12 = OpConstantNull %_arr_mat4v4float_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat4v4float_uint_4 Private %12
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v4float_uint_4 = OpTypePointer Uniform %_arr_mat4v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat4v4float = OpTypePointer Private %mat4v4float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+         %29 = OpConstantNull %int
+%_ptr_Private_v4float = OpTypePointer Private %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_mat4v4float_uint_4 %u %uint_0
+         %20 = OpLoad %_arr_mat4v4float_uint_4 %19
+               OpStore %p %20
+         %24 = OpAccessChain %_ptr_Private_mat4v4float %p %int_1
+         %27 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0 %int_2
+         %28 = OpLoad %mat4v4float %27
+               OpStore %24 %28
+         %31 = OpAccessChain %_ptr_Private_v4float %p %int_1 %29
+         %33 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %29 %int_1
+         %34 = OpLoad %v4float %33
+         %35 = OpVectorShuffle %v4float %34 %34 1 3 0 2
+               OpStore %31 %35
+         %37 = OpAccessChain %_ptr_Private_float %p %int_1 %29 %uint_0
+         %39 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %29 %int_1 %uint_0
+         %40 = OpLoad %float %39
+               OpStore %37 %40
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..b309aad
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f32>, 4>;
+
+var<private> p : array<mat4x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].ywxz;
+  p[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl
new file mode 100644
index 0000000..20fd31a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f32>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].ywxz;
+    s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9fd7977
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,48 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float4x4 value[4]) {
+  float4x4 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 64u)), array[i]);
+    }
+  }
+}
+
+float4x4 tint_symbol_4(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 64u, tint_symbol_4(u, 128u));
+  s.Store4(64u, asuint(asfloat(u[1]).ywxz));
+  s.Store(64u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..9fd7977
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,48 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float4x4 value[4]) {
+  float4x4 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 64u)), array[i]);
+    }
+  }
+}
+
+float4x4 tint_symbol_4(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 64u, tint_symbol_4(u, 128u));
+  s.Store4(64u, asuint(asfloat(u[1]).ywxz));
+  s.Store(64u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..4cd2b78
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat4 inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[1][0] = u.inner[0][1].ywxz;
+  s.inner[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..99338a6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<float4x4, 4>* tint_symbol [[buffer(1)]], const constant tint_array<float4x4, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float4((*(tint_symbol_1))[0][1]).ywxz;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..8867684
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,71 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v4float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat4v4float_uint_4 = OpTypePointer StorageBuffer %_arr_mat4v4float_uint_4
+%_ptr_Uniform__arr_mat4v4float_uint_4 = OpTypePointer Uniform %_arr_mat4v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat4v4float = OpTypePointer StorageBuffer %mat4v4float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+         %30 = OpConstantNull %int
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %18 = OpAccessChain %_ptr_StorageBuffer__arr_mat4v4float_uint_4 %s %uint_0
+         %20 = OpAccessChain %_ptr_Uniform__arr_mat4v4float_uint_4 %u %uint_0
+         %21 = OpLoad %_arr_mat4v4float_uint_4 %20
+               OpStore %18 %21
+         %25 = OpAccessChain %_ptr_StorageBuffer_mat4v4float %s %uint_0 %int_1
+         %28 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0 %int_2
+         %29 = OpLoad %mat4v4float %28
+               OpStore %25 %29
+         %32 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1 %30
+         %34 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %30 %int_1
+         %35 = OpLoad %v4float %34
+         %36 = OpVectorShuffle %v4float %35 %35 1 3 0 2
+               OpStore %32 %36
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %int_1 %30 %uint_0
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %30 %int_1 %uint_0
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..3423e03
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f32>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].ywxz;
+  s[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..353d20c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f32>, 4>;
+var<workgroup> w : array<mat4x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].ywxz;
+    w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..312730e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,47 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+groupshared float4x4 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x4 tint_symbol_3(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 128u);
+  w[1][0] = asfloat(u[1]).ywxz;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..312730e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,47 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+groupshared float4x4 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x4 tint_symbol_3(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+typedef float4x4 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[16], uint offset) {
+  float4x4 arr[4] = (float4x4[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 128u);
+  w[1][0] = asfloat(u[1]).ywxz;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..3c9583f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4 inner[4];
+} u;
+
+shared mat4 w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = mat4(vec4(0.0f), vec4(0.0f), vec4(0.0f), vec4(0.0f));
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[1][0] = u.inner[0][1].ywxz;
+  w[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..2e14882
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<float4x4, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<float4x4, 4>* const tint_symbol, const constant tint_array<float4x4, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = float4x4(float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float4((*(tint_symbol_1))[0][1]).ywxz;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<float4x4, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<float4x4, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..27d483d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,115 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 70
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v4float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v4float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_mat4v4float_uint_4 = OpTypePointer Workgroup %_arr_mat4v4float_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat4v4float_uint_4 Workgroup
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %21 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_mat4v4float = OpTypePointer Workgroup %mat4v4float
+         %35 = OpConstantNull %mat4v4float
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v4float_uint_4 = OpTypePointer Uniform %_arr_mat4v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+         %53 = OpConstantNull %int
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %65 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %14
+%local_invocation_index = OpFunctionParameter %uint
+         %18 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %21
+               OpStore %idx %local_invocation_index
+               OpBranch %22
+         %22 = OpLabel
+               OpLoopMerge %23 %24 None
+               OpBranch %25
+         %25 = OpLabel
+         %27 = OpLoad %uint %idx
+         %28 = OpULessThan %bool %27 %uint_4
+         %26 = OpLogicalNot %bool %28
+               OpSelectionMerge %30 None
+               OpBranchConditional %26 %31 %30
+         %31 = OpLabel
+               OpBranch %23
+         %30 = OpLabel
+         %32 = OpLoad %uint %idx
+         %34 = OpAccessChain %_ptr_Workgroup_mat4v4float %w %32
+               OpStore %34 %35
+               OpBranch %24
+         %24 = OpLabel
+         %36 = OpLoad %uint %idx
+         %38 = OpIAdd %uint %36 %uint_1
+               OpStore %idx %38
+               OpBranch %22
+         %23 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %44 = OpAccessChain %_ptr_Uniform__arr_mat4v4float_uint_4 %u %uint_0
+         %45 = OpLoad %_arr_mat4v4float_uint_4 %44
+               OpStore %w %45
+         %48 = OpAccessChain %_ptr_Workgroup_mat4v4float %w %int_1
+         %51 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0 %int_2
+         %52 = OpLoad %mat4v4float %51
+               OpStore %48 %52
+         %55 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1 %53
+         %57 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %53 %int_1
+         %58 = OpLoad %v4float %57
+         %59 = OpVectorShuffle %v4float %58 %58 1 3 0 2
+               OpStore %55 %59
+         %61 = OpAccessChain %_ptr_Workgroup_float %w %int_1 %53 %uint_0
+         %63 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %53 %int_1 %uint_0
+         %64 = OpLoad %float %63
+               OpStore %61 %64
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %65
+         %67 = OpLabel
+         %69 = OpLoad %uint %local_invocation_index_1
+         %68 = OpFunctionCall %void %f_inner %69
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..bfd5a5d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/array/mat4x4_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x4<f32>, 4>;
+
+var<workgroup> w : array<mat4x4<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].ywxz;
+  w[1][0].x = u[0][1].x;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl
deleted file mode 100644
index fc1258e..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl
+++ /dev/null

@@ -1,31 +0,0 @@
-struct Inner {
-  m : mat2x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-var<private> counter = 0;
-fn i() -> i32 { counter++; return counter; }
-
-@compute @workgroup_size(1)
-fn f() {
-  let p_a           = &a;
-  let p_a_i         = &((*p_a)[i()]);
-  let p_a_i_a       = &((*p_a_i).a);
-  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
-  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
-  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
-
-
-  let l_a             : array<Outer, 4> =  *p_a;
-  let l_a_i           : Outer           =  *p_a_i;
-  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
-  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
-  let l_a_i_a_i_m     : mat2x2<f32>     =  *p_a_i_a_i_m;
-  let l_a_i_a_i_m_i   : vec2<f32>       =  *p_a_i_a_i_m_i;
-  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
deleted file mode 100644
index bf03768..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,78 +0,0 @@
-struct Inner {
-  float2x2 m;
-};
-struct Outer {
-  Inner a[4];
-};
-
-cbuffer cbuffer_a : register(b0, space0) {
-  uint4 a[16];
-};
-static int counter = 0;
-
-int i() {
-  counter = (counter + 1);
-  return counter;
-}
-
-float2x2 tint_symbol_8(uint4 buffer[16], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-Inner tint_symbol_7(uint4 buffer[16], uint offset) {
-  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
-  return tint_symbol_11;
-}
-
-typedef Inner tint_symbol_6_ret[4];
-tint_symbol_6_ret tint_symbol_6(uint4 buffer[16], uint offset) {
-  Inner arr[4] = (Inner[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 16u)));
-    }
-  }
-  return arr;
-}
-
-Outer tint_symbol_5(uint4 buffer[16], uint offset) {
-  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
-  return tint_symbol_12;
-}
-
-typedef Outer tint_symbol_4_ret[4];
-tint_symbol_4_ret tint_symbol_4(uint4 buffer[16], uint offset) {
-  Outer arr_1[4] = (Outer[4])0;
-  {
-    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
-      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 64u)));
-    }
-  }
-  return arr_1;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const int p_a_i_save = i();
-  const int p_a_i_a_i_save = i();
-  const int p_a_i_a_i_m_i_save = i();
-  const Outer l_a[4] = tint_symbol_4(a, 0u);
-  const Outer l_a_i = tint_symbol_5(a, (64u * uint(p_a_i_save)));
-  const Inner l_a_i_a[4] = tint_symbol_6(a, (64u * uint(p_a_i_save)));
-  const Inner l_a_i_a_i = tint_symbol_7(a, ((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))));
-  const float2x2 l_a_i_a_i_m = tint_symbol_8(a, ((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))));
-  const uint scalar_offset_2 = ((((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
-  uint4 ubo_load_2 = a[scalar_offset_2 / 4];
-  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy));
-  const int tint_symbol = p_a_i_save;
-  const int tint_symbol_1 = p_a_i_a_i_save;
-  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
-  const int tint_symbol_3 = i();
-  const uint scalar_offset_3 = (((((64u * uint(tint_symbol)) + (16u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
-  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_3 / 4][scalar_offset_3 % 4]);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
deleted file mode 100644
index bf03768..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,78 +0,0 @@
-struct Inner {
-  float2x2 m;
-};
-struct Outer {
-  Inner a[4];
-};
-
-cbuffer cbuffer_a : register(b0, space0) {
-  uint4 a[16];
-};
-static int counter = 0;
-
-int i() {
-  counter = (counter + 1);
-  return counter;
-}
-
-float2x2 tint_symbol_8(uint4 buffer[16], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-Inner tint_symbol_7(uint4 buffer[16], uint offset) {
-  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
-  return tint_symbol_11;
-}
-
-typedef Inner tint_symbol_6_ret[4];
-tint_symbol_6_ret tint_symbol_6(uint4 buffer[16], uint offset) {
-  Inner arr[4] = (Inner[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 16u)));
-    }
-  }
-  return arr;
-}
-
-Outer tint_symbol_5(uint4 buffer[16], uint offset) {
-  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
-  return tint_symbol_12;
-}
-
-typedef Outer tint_symbol_4_ret[4];
-tint_symbol_4_ret tint_symbol_4(uint4 buffer[16], uint offset) {
-  Outer arr_1[4] = (Outer[4])0;
-  {
-    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
-      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 64u)));
-    }
-  }
-  return arr_1;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const int p_a_i_save = i();
-  const int p_a_i_a_i_save = i();
-  const int p_a_i_a_i_m_i_save = i();
-  const Outer l_a[4] = tint_symbol_4(a, 0u);
-  const Outer l_a_i = tint_symbol_5(a, (64u * uint(p_a_i_save)));
-  const Inner l_a_i_a[4] = tint_symbol_6(a, (64u * uint(p_a_i_save)));
-  const Inner l_a_i_a_i = tint_symbol_7(a, ((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))));
-  const float2x2 l_a_i_a_i_m = tint_symbol_8(a, ((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))));
-  const uint scalar_offset_2 = ((((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
-  uint4 ubo_load_2 = a[scalar_offset_2 / 4];
-  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy));
-  const int tint_symbol = p_a_i_save;
-  const int tint_symbol_1 = p_a_i_a_i_save;
-  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
-  const int tint_symbol_3 = i();
-  const uint scalar_offset_3 = (((((64u * uint(tint_symbol)) + (16u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
-  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_3 / 4][scalar_offset_3 % 4]);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.glsl
deleted file mode 100644
index 90e0cf3..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.glsl
+++ /dev/null

@@ -1,122 +0,0 @@
-#version 310 es
-
-struct Inner {
-  mat2 m;
-};
-
-struct Inner_std140 {
-  vec2 m_0;
-  vec2 m_1;
-};
-
-struct Outer {
-  Inner a[4];
-};
-
-struct Outer_std140 {
-  Inner_std140 a[4];
-};
-
-layout(binding = 0, std140) uniform a_block_std140_ubo {
-  Outer_std140 inner[4];
-} a;
-
-int counter = 0;
-int i() {
-  counter = (counter + 1);
-  return counter;
-}
-
-Inner conv_Inner(Inner_std140 val) {
-  return Inner(mat2(val.m_0, val.m_1));
-}
-
-Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
-  Inner arr[4] = Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_Inner(val[i]);
-    }
-  }
-  return arr;
-}
-
-Outer conv_Outer(Outer_std140 val) {
-  return Outer(conv_arr4_Inner(val.a));
-}
-
-Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
-  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_Outer(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat2 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
-  uint s_save = p0;
-  uint s_save_1 = p1;
-  return mat2(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1);
-}
-
-vec2 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
-  switch(p2) {
-    case 0u: {
-      return a.inner[p0].a[p1].m_0;
-      break;
-    }
-    case 1u: {
-      return a.inner[p0].a[p1].m_1;
-      break;
-    }
-    default: {
-      return vec2(0.0f);
-      break;
-    }
-  }
-}
-
-float load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
-  switch(p2) {
-    case 0u: {
-      return a.inner[p0].a[p1].m_0[p3];
-      break;
-    }
-    case 1u: {
-      return a.inner[p0].a[p1].m_1[p3];
-      break;
-    }
-    default: {
-      return 0.0f;
-      break;
-    }
-  }
-}
-
-void f() {
-  Outer p_a[4] = conv_arr4_Outer(a.inner);
-  int tint_symbol = i();
-  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
-  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
-  int tint_symbol_1 = i();
-  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
-  mat2 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
-  int tint_symbol_2 = i();
-  vec2 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
-  Outer l_a[4] = conv_arr4_Outer(a.inner);
-  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
-  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
-  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
-  mat2 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
-  vec2 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
-  int tint_symbol_3 = i();
-  float l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.msl
deleted file mode 100644
index c1d8152..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.msl
+++ /dev/null

@@ -1,48 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct Inner {
-  /* 0x0000 */ float2x2 m;
-};
-
-struct Outer {
-  /* 0x0000 */ tint_array<Inner, 4> a;
-};
-
-int i() {
-  thread int tint_symbol_4 = 0;
-  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
-  return tint_symbol_4;
-}
-
-kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
-  int const tint_symbol = i();
-  int const p_a_i_save = tint_symbol;
-  int const tint_symbol_1 = i();
-  int const p_a_i_a_i_save = tint_symbol_1;
-  int const tint_symbol_2 = i();
-  int const p_a_i_a_i_m_i_save = tint_symbol_2;
-  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
-  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
-  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
-  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
-  float2x2 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
-  float2 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
-  int const tint_symbol_3 = i();
-  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.spvasm
deleted file mode 100644
index 796a192..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.spvasm
+++ /dev/null

@@ -1,306 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 193
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %a_block_std140 "a_block_std140"
-               OpMemberName %a_block_std140 0 "inner"
-               OpName %Outer_std140 "Outer_std140"
-               OpMemberName %Outer_std140 0 "a"
-               OpName %Inner_std140 "Inner_std140"
-               OpMemberName %Inner_std140 0 "m_0"
-               OpMemberName %Inner_std140 1 "m_1"
-               OpName %a "a"
-               OpName %counter "counter"
-               OpName %i "i"
-               OpName %Inner "Inner"
-               OpMemberName %Inner 0 "m"
-               OpName %conv_Inner "conv_Inner"
-               OpName %val "val"
-               OpName %conv_arr4_Inner "conv_arr4_Inner"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i_0 "i"
-               OpName %var_for_index_1 "var_for_index_1"
-               OpName %Outer "Outer"
-               OpMemberName %Outer 0 "a"
-               OpName %conv_Outer "conv_Outer"
-               OpName %val_1 "val"
-               OpName %conv_arr4_Outer "conv_arr4_Outer"
-               OpName %val_2 "val"
-               OpName %arr_0 "arr"
-               OpName %i_1 "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
-               OpName %p0 "p0"
-               OpName %p1 "p1"
-               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
-               OpName %p0_0 "p0"
-               OpName %p1_0 "p1"
-               OpName %p2 "p2"
-               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
-               OpName %p0_1 "p0"
-               OpName %p1_1 "p1"
-               OpName %p2_0 "p2"
-               OpName %p3 "p3"
-               OpName %f "f"
-               OpDecorate %a_block_std140 Block
-               OpMemberDecorate %a_block_std140 0 Offset 0
-               OpMemberDecorate %Outer_std140 0 Offset 0
-               OpMemberDecorate %Inner_std140 0 Offset 0
-               OpMemberDecorate %Inner_std140 1 Offset 8
-               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 16
-               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 64
-               OpDecorate %a NonWritable
-               OpDecorate %a DescriptorSet 0
-               OpDecorate %a Binding 0
-               OpMemberDecorate %Inner 0 Offset 0
-               OpMemberDecorate %Inner 0 ColMajor
-               OpMemberDecorate %Inner 0 MatrixStride 8
-               OpDecorate %_arr_Inner_uint_4 ArrayStride 16
-               OpMemberDecorate %Outer 0 Offset 0
-               OpDecorate %_arr_Outer_uint_4 ArrayStride 64
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-%Inner_std140 = OpTypeStruct %v2float %v2float
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
-%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
-%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
-%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
-%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
-          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
-        %int = OpTypeInt 32 1
-         %13 = OpConstantNull %int
-%_ptr_Private_int = OpTypePointer Private %int
-    %counter = OpVariable %_ptr_Private_int Private %13
-         %16 = OpTypeFunction %int
-      %int_1 = OpConstant %int 1
-%mat2v2float = OpTypeMatrix %v2float 2
-      %Inner = OpTypeStruct %mat2v2float
-         %23 = OpTypeFunction %Inner %Inner_std140
-%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
-         %33 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
-%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
-         %40 = OpConstantNull %_arr_Inner_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %43 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
-         %56 = OpConstantNull %_arr_Inner_std140_uint_4
-%_ptr_Function_Inner = OpTypePointer Function %Inner
-%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
-     %uint_1 = OpConstant %uint 1
-      %Outer = OpTypeStruct %_arr_Inner_uint_4
-         %69 = OpTypeFunction %Outer %Outer_std140
-%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
-         %77 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
-%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
-         %84 = OpConstantNull %_arr_Outer_uint_4
-%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
-         %97 = OpConstantNull %_arr_Outer_std140_uint_4
-%_ptr_Function_Outer = OpTypePointer Function %Outer
-%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
-        %109 = OpTypeFunction %mat2v2float %uint %uint
-     %uint_0 = OpConstant %uint 0
-%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-        %126 = OpTypeFunction %v2float %uint %uint %uint
-        %140 = OpConstantNull %v2float
-        %141 = OpTypeFunction %float %uint %uint %uint %uint
-%_ptr_Uniform_float = OpTypePointer Uniform %float
-        %157 = OpConstantNull %float
-       %void = OpTypeVoid
-        %158 = OpTypeFunction %void
-%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
-%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
-%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
-          %i = OpFunction %int None %16
-         %18 = OpLabel
-         %19 = OpLoad %int %counter
-         %21 = OpIAdd %int %19 %int_1
-               OpStore %counter %21
-         %22 = OpLoad %int %counter
-               OpReturnValue %22
-               OpFunctionEnd
- %conv_Inner = OpFunction %Inner None %23
-        %val = OpFunctionParameter %Inner_std140
-         %28 = OpLabel
-         %29 = OpCompositeExtract %v2float %val 0
-         %30 = OpCompositeExtract %v2float %val 1
-         %31 = OpCompositeConstruct %mat2v2float %29 %30
-         %32 = OpCompositeConstruct %Inner %31
-               OpReturnValue %32
-               OpFunctionEnd
-%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %33
-      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
-         %37 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %40
-        %i_0 = OpVariable %_ptr_Function_uint Function %43
-%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %56
-               OpBranch %44
-         %44 = OpLabel
-               OpLoopMerge %45 %46 None
-               OpBranch %47
-         %47 = OpLabel
-         %49 = OpLoad %uint %i_0
-         %50 = OpULessThan %bool %49 %uint_4
-         %48 = OpLogicalNot %bool %50
-               OpSelectionMerge %52 None
-               OpBranchConditional %48 %53 %52
-         %53 = OpLabel
-               OpBranch %45
-         %52 = OpLabel
-               OpStore %var_for_index_1 %val_0
-         %57 = OpLoad %uint %i_0
-         %59 = OpAccessChain %_ptr_Function_Inner %arr %57
-         %61 = OpLoad %uint %i_0
-         %63 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %61
-         %64 = OpLoad %Inner_std140 %63
-         %60 = OpFunctionCall %Inner %conv_Inner %64
-               OpStore %59 %60
-               OpBranch %46
-         %46 = OpLabel
-         %65 = OpLoad %uint %i_0
-         %67 = OpIAdd %uint %65 %uint_1
-               OpStore %i_0 %67
-               OpBranch %44
-         %45 = OpLabel
-         %68 = OpLoad %_arr_Inner_uint_4 %arr
-               OpReturnValue %68
-               OpFunctionEnd
- %conv_Outer = OpFunction %Outer None %69
-      %val_1 = OpFunctionParameter %Outer_std140
-         %73 = OpLabel
-         %75 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
-         %74 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %75
-         %76 = OpCompositeConstruct %Outer %74
-               OpReturnValue %76
-               OpFunctionEnd
-%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %77
-      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
-         %81 = OpLabel
-      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %84
-        %i_1 = OpVariable %_ptr_Function_uint Function %43
-%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %97
-               OpBranch %86
-         %86 = OpLabel
-               OpLoopMerge %87 %88 None
-               OpBranch %89
-         %89 = OpLabel
-         %91 = OpLoad %uint %i_1
-         %92 = OpULessThan %bool %91 %uint_4
-         %90 = OpLogicalNot %bool %92
-               OpSelectionMerge %93 None
-               OpBranchConditional %90 %94 %93
-         %94 = OpLabel
-               OpBranch %87
-         %93 = OpLabel
-               OpStore %var_for_index %val_2
-         %98 = OpLoad %uint %i_1
-        %100 = OpAccessChain %_ptr_Function_Outer %arr_0 %98
-        %102 = OpLoad %uint %i_1
-        %104 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %102
-        %105 = OpLoad %Outer_std140 %104
-        %101 = OpFunctionCall %Outer %conv_Outer %105
-               OpStore %100 %101
-               OpBranch %88
-         %88 = OpLabel
-        %106 = OpLoad %uint %i_1
-        %107 = OpIAdd %uint %106 %uint_1
-               OpStore %i_1 %107
-               OpBranch %86
-         %87 = OpLabel
-        %108 = OpLoad %_arr_Outer_uint_4 %arr_0
-               OpReturnValue %108
-               OpFunctionEnd
-%load_a_inner_p0_a_p1_m = OpFunction %mat2v2float None %109
-         %p0 = OpFunctionParameter %uint
-         %p1 = OpFunctionParameter %uint
-        %113 = OpLabel
-        %117 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
-        %120 = OpAccessChain %_ptr_Uniform_v2float %117 %uint_0
-        %121 = OpLoad %v2float %120
-        %123 = OpAccessChain %_ptr_Uniform_v2float %117 %uint_1
-        %124 = OpLoad %v2float %123
-        %125 = OpCompositeConstruct %mat2v2float %121 %124
-               OpReturnValue %125
-               OpFunctionEnd
-%load_a_inner_p0_a_p1_m_p2 = OpFunction %v2float None %126
-       %p0_0 = OpFunctionParameter %uint
-       %p1_0 = OpFunctionParameter %uint
-         %p2 = OpFunctionParameter %uint
-        %131 = OpLabel
-               OpSelectionMerge %132 None
-               OpSwitch %p2 %133 0 %134 1 %135
-        %134 = OpLabel
-        %136 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
-        %137 = OpLoad %v2float %136
-               OpReturnValue %137
-        %135 = OpLabel
-        %138 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
-        %139 = OpLoad %v2float %138
-               OpReturnValue %139
-        %133 = OpLabel
-               OpReturnValue %140
-        %132 = OpLabel
-               OpReturnValue %140
-               OpFunctionEnd
-%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %float None %141
-       %p0_1 = OpFunctionParameter %uint
-       %p1_1 = OpFunctionParameter %uint
-       %p2_0 = OpFunctionParameter %uint
-         %p3 = OpFunctionParameter %uint
-        %147 = OpLabel
-               OpSelectionMerge %148 None
-               OpSwitch %p2_0 %149 0 %150 1 %151
-        %150 = OpLabel
-        %153 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
-        %154 = OpLoad %float %153
-               OpReturnValue %154
-        %151 = OpLabel
-        %155 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
-        %156 = OpLoad %float %155
-               OpReturnValue %156
-        %149 = OpLabel
-               OpReturnValue %157
-        %148 = OpLabel
-               OpReturnValue %157
-               OpFunctionEnd
-          %f = OpFunction %void None %158
-        %161 = OpLabel
-        %162 = OpFunctionCall %int %i
-        %163 = OpFunctionCall %int %i
-        %164 = OpFunctionCall %int %i
-        %167 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
-        %168 = OpLoad %_arr_Outer_std140_uint_4 %167
-        %165 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %168
-        %171 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %162
-        %172 = OpLoad %Outer_std140 %171
-        %169 = OpFunctionCall %Outer %conv_Outer %172
-        %175 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %162 %uint_0
-        %176 = OpLoad %_arr_Inner_std140_uint_4 %175
-        %173 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %176
-        %178 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %162 %uint_0 %163
-        %179 = OpLoad %Inner_std140 %178
-        %177 = OpFunctionCall %Inner %conv_Inner %179
-        %181 = OpBitcast %uint %162
-        %182 = OpBitcast %uint %163
-        %180 = OpFunctionCall %mat2v2float %load_a_inner_p0_a_p1_m %181 %182
-        %184 = OpBitcast %uint %162
-        %185 = OpBitcast %uint %163
-        %186 = OpBitcast %uint %164
-        %183 = OpFunctionCall %v2float %load_a_inner_p0_a_p1_m_p2 %184 %185 %186
-        %187 = OpFunctionCall %int %i
-        %189 = OpBitcast %uint %162
-        %190 = OpBitcast %uint %163
-        %191 = OpBitcast %uint %164
-        %192 = OpBitcast %uint %187
-        %188 = OpFunctionCall %float %load_a_inner_p0_a_p1_m_p2_p3 %189 %190 %191 %192
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.wgsl
deleted file mode 100644
index 97d5141..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/dynamic_index_via_ptr.wgsl.expected.wgsl
+++ /dev/null

@@ -1,33 +0,0 @@
-struct Inner {
-  m : mat2x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-var<private> counter = 0;
-
-fn i() -> i32 {
-  counter++;
-  return counter;
-}
-
-@compute @workgroup_size(1)
-fn f() {
-  let p_a = &(a);
-  let p_a_i = &((*(p_a))[i()]);
-  let p_a_i_a = &((*(p_a_i)).a);
-  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
-  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
-  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
-  let l_a : array<Outer, 4> = *(p_a);
-  let l_a_i : Outer = *(p_a_i);
-  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
-  let l_a_i_a_i : Inner = *(p_a_i_a_i);
-  let l_a_i_a_i_m : mat2x2<f32> = *(p_a_i_a_i_m);
-  let l_a_i_a_i_m_i : vec2<f32> = *(p_a_i_a_i_m_i);
-  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl
deleted file mode 100644
index 0a790a9..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl
+++ /dev/null

@@ -1,28 +0,0 @@
-struct Inner {
-  m : mat2x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  let p_a = &a;
-  let p_a_3 = &((*p_a)[3]);
-  let p_a_3_a = &((*p_a_3).a);
-  let p_a_3_a_2 = &((*p_a_3_a)[2]);
-  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
-  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
-
-
-  let l_a             : array<Outer, 4> = *p_a;
-  let l_a_3           : Outer           = *p_a_3;
-  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
-  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
-  let l_a_3_a_2_m     : mat2x2<f32>     = *p_a_3_a_2_m;
-  let l_a_3_a_2_m_1   : vec2<f32>       = *p_a_3_a_2_m_1;
-  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
deleted file mode 100644
index fcf8c2f..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,62 +0,0 @@
-struct Inner {
-  float2x2 m;
-};
-struct Outer {
-  Inner a[4];
-};
-
-cbuffer cbuffer_a : register(b0, space0) {
-  uint4 a[16];
-};
-
-float2x2 tint_symbol_4(uint4 buffer[16], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-Inner tint_symbol_3(uint4 buffer[16], uint offset) {
-  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
-  return tint_symbol_7;
-}
-
-typedef Inner tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[16], uint offset) {
-  Inner arr[4] = (Inner[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_3(buffer, (offset + (i * 16u)));
-    }
-  }
-  return arr;
-}
-
-Outer tint_symbol_1(uint4 buffer[16], uint offset) {
-  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
-  return tint_symbol_8;
-}
-
-typedef Outer tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
-  Outer arr_1[4] = (Outer[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 64u)));
-    }
-  }
-  return arr_1;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const Outer l_a[4] = tint_symbol(a, 0u);
-  const Outer l_a_3 = tint_symbol_1(a, 192u);
-  const Inner l_a_3_a[4] = tint_symbol_2(a, 192u);
-  const Inner l_a_3_a_2 = tint_symbol_3(a, 224u);
-  const float2x2 l_a_3_a_2_m = tint_symbol_4(a, 224u);
-  const float2 l_a_3_a_2_m_1 = asfloat(a[14].zw);
-  const float l_a_3_a_2_m_1_0 = asfloat(a[14].z);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
deleted file mode 100644
index fcf8c2f..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,62 +0,0 @@
-struct Inner {
-  float2x2 m;
-};
-struct Outer {
-  Inner a[4];
-};
-
-cbuffer cbuffer_a : register(b0, space0) {
-  uint4 a[16];
-};
-
-float2x2 tint_symbol_4(uint4 buffer[16], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-Inner tint_symbol_3(uint4 buffer[16], uint offset) {
-  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
-  return tint_symbol_7;
-}
-
-typedef Inner tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[16], uint offset) {
-  Inner arr[4] = (Inner[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_3(buffer, (offset + (i * 16u)));
-    }
-  }
-  return arr;
-}
-
-Outer tint_symbol_1(uint4 buffer[16], uint offset) {
-  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
-  return tint_symbol_8;
-}
-
-typedef Outer tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
-  Outer arr_1[4] = (Outer[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 64u)));
-    }
-  }
-  return arr_1;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const Outer l_a[4] = tint_symbol(a, 0u);
-  const Outer l_a_3 = tint_symbol_1(a, 192u);
-  const Inner l_a_3_a[4] = tint_symbol_2(a, 192u);
-  const Inner l_a_3_a_2 = tint_symbol_3(a, 224u);
-  const float2x2 l_a_3_a_2_m = tint_symbol_4(a, 224u);
-  const float2 l_a_3_a_2_m_1 = asfloat(a[14].zw);
-  const float l_a_3_a_2_m_1_0 = asfloat(a[14].z);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.glsl
deleted file mode 100644
index 3564979..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.glsl
+++ /dev/null

@@ -1,76 +0,0 @@
-#version 310 es
-
-struct Inner {
-  mat2 m;
-};
-
-struct Inner_std140 {
-  vec2 m_0;
-  vec2 m_1;
-};
-
-struct Outer {
-  Inner a[4];
-};
-
-struct Outer_std140 {
-  Inner_std140 a[4];
-};
-
-layout(binding = 0, std140) uniform a_block_std140_ubo {
-  Outer_std140 inner[4];
-} a;
-
-Inner conv_Inner(Inner_std140 val) {
-  return Inner(mat2(val.m_0, val.m_1));
-}
-
-Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
-  Inner arr[4] = Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_Inner(val[i]);
-    }
-  }
-  return arr;
-}
-
-Outer conv_Outer(Outer_std140 val) {
-  return Outer(conv_arr4_Inner(val.a));
-}
-
-Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
-  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f)))));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_Outer(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat2 load_a_inner_3_a_2_m() {
-  return mat2(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1);
-}
-
-void f() {
-  Outer p_a[4] = conv_arr4_Outer(a.inner);
-  Outer p_a_3 = conv_Outer(a.inner[3u]);
-  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
-  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
-  mat2 p_a_3_a_2_m = load_a_inner_3_a_2_m();
-  vec2 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
-  Outer l_a[4] = conv_arr4_Outer(a.inner);
-  Outer l_a_3 = conv_Outer(a.inner[3u]);
-  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
-  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
-  mat2 l_a_3_a_2_m = load_a_inner_3_a_2_m();
-  vec2 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
-  float l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.msl
deleted file mode 100644
index e0658ac..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.msl
+++ /dev/null

@@ -1,35 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct Inner {
-  /* 0x0000 */ float2x2 m;
-};
-
-struct Outer {
-  /* 0x0000 */ tint_array<Inner, 4> a;
-};
-
-kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
-  tint_array<Outer, 4> const l_a = *(tint_symbol);
-  Outer const l_a_3 = (*(tint_symbol))[3];
-  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
-  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
-  float2x2 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
-  float2 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
-  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.spvasm
deleted file mode 100644
index de1047e..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.spvasm
+++ /dev/null

@@ -1,223 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 140
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %a_block_std140 "a_block_std140"
-               OpMemberName %a_block_std140 0 "inner"
-               OpName %Outer_std140 "Outer_std140"
-               OpMemberName %Outer_std140 0 "a"
-               OpName %Inner_std140 "Inner_std140"
-               OpMemberName %Inner_std140 0 "m_0"
-               OpMemberName %Inner_std140 1 "m_1"
-               OpName %a "a"
-               OpName %Inner "Inner"
-               OpMemberName %Inner 0 "m"
-               OpName %conv_Inner "conv_Inner"
-               OpName %val "val"
-               OpName %conv_arr4_Inner "conv_arr4_Inner"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index_1 "var_for_index_1"
-               OpName %Outer "Outer"
-               OpMemberName %Outer 0 "a"
-               OpName %conv_Outer "conv_Outer"
-               OpName %val_1 "val"
-               OpName %conv_arr4_Outer "conv_arr4_Outer"
-               OpName %val_2 "val"
-               OpName %arr_0 "arr"
-               OpName %i_0 "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
-               OpName %f "f"
-               OpDecorate %a_block_std140 Block
-               OpMemberDecorate %a_block_std140 0 Offset 0
-               OpMemberDecorate %Outer_std140 0 Offset 0
-               OpMemberDecorate %Inner_std140 0 Offset 0
-               OpMemberDecorate %Inner_std140 1 Offset 8
-               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 16
-               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 64
-               OpDecorate %a NonWritable
-               OpDecorate %a DescriptorSet 0
-               OpDecorate %a Binding 0
-               OpMemberDecorate %Inner 0 Offset 0
-               OpMemberDecorate %Inner 0 ColMajor
-               OpMemberDecorate %Inner 0 MatrixStride 8
-               OpDecorate %_arr_Inner_uint_4 ArrayStride 16
-               OpMemberDecorate %Outer 0 Offset 0
-               OpDecorate %_arr_Outer_uint_4 ArrayStride 64
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-%Inner_std140 = OpTypeStruct %v2float %v2float
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
-%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
-%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
-%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
-%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
-          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
-%mat2v2float = OpTypeMatrix %v2float 2
-      %Inner = OpTypeStruct %mat2v2float
-         %12 = OpTypeFunction %Inner %Inner_std140
-%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
-         %22 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
-%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
-         %29 = OpConstantNull %_arr_Inner_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %32 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
-         %45 = OpConstantNull %_arr_Inner_std140_uint_4
-%_ptr_Function_Inner = OpTypePointer Function %Inner
-%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
-     %uint_1 = OpConstant %uint 1
-      %Outer = OpTypeStruct %_arr_Inner_uint_4
-         %58 = OpTypeFunction %Outer %Outer_std140
-%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
-         %66 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
-%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
-         %73 = OpConstantNull %_arr_Outer_uint_4
-%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
-         %86 = OpConstantNull %_arr_Outer_std140_uint_4
-%_ptr_Function_Outer = OpTypePointer Function %Outer
-%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
-         %98 = OpTypeFunction %mat2v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_3 = OpConstant %uint 3
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-       %void = OpTypeVoid
-        %115 = OpTypeFunction %void
-%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
-%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
-%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
-%_ptr_Uniform_float = OpTypePointer Uniform %float
- %conv_Inner = OpFunction %Inner None %12
-        %val = OpFunctionParameter %Inner_std140
-         %17 = OpLabel
-         %18 = OpCompositeExtract %v2float %val 0
-         %19 = OpCompositeExtract %v2float %val 1
-         %20 = OpCompositeConstruct %mat2v2float %18 %19
-         %21 = OpCompositeConstruct %Inner %20
-               OpReturnValue %21
-               OpFunctionEnd
-%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %22
-      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
-         %26 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %29
-          %i = OpVariable %_ptr_Function_uint Function %32
-%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %45
-               OpBranch %33
-         %33 = OpLabel
-               OpLoopMerge %34 %35 None
-               OpBranch %36
-         %36 = OpLabel
-         %38 = OpLoad %uint %i
-         %39 = OpULessThan %bool %38 %uint_4
-         %37 = OpLogicalNot %bool %39
-               OpSelectionMerge %41 None
-               OpBranchConditional %37 %42 %41
-         %42 = OpLabel
-               OpBranch %34
-         %41 = OpLabel
-               OpStore %var_for_index_1 %val_0
-         %46 = OpLoad %uint %i
-         %48 = OpAccessChain %_ptr_Function_Inner %arr %46
-         %50 = OpLoad %uint %i
-         %52 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %50
-         %53 = OpLoad %Inner_std140 %52
-         %49 = OpFunctionCall %Inner %conv_Inner %53
-               OpStore %48 %49
-               OpBranch %35
-         %35 = OpLabel
-         %54 = OpLoad %uint %i
-         %56 = OpIAdd %uint %54 %uint_1
-               OpStore %i %56
-               OpBranch %33
-         %34 = OpLabel
-         %57 = OpLoad %_arr_Inner_uint_4 %arr
-               OpReturnValue %57
-               OpFunctionEnd
- %conv_Outer = OpFunction %Outer None %58
-      %val_1 = OpFunctionParameter %Outer_std140
-         %62 = OpLabel
-         %64 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
-         %63 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %64
-         %65 = OpCompositeConstruct %Outer %63
-               OpReturnValue %65
-               OpFunctionEnd
-%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %66
-      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
-         %70 = OpLabel
-      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %73
-        %i_0 = OpVariable %_ptr_Function_uint Function %32
-%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %86
-               OpBranch %75
-         %75 = OpLabel
-               OpLoopMerge %76 %77 None
-               OpBranch %78
-         %78 = OpLabel
-         %80 = OpLoad %uint %i_0
-         %81 = OpULessThan %bool %80 %uint_4
-         %79 = OpLogicalNot %bool %81
-               OpSelectionMerge %82 None
-               OpBranchConditional %79 %83 %82
-         %83 = OpLabel
-               OpBranch %76
-         %82 = OpLabel
-               OpStore %var_for_index %val_2
-         %87 = OpLoad %uint %i_0
-         %89 = OpAccessChain %_ptr_Function_Outer %arr_0 %87
-         %91 = OpLoad %uint %i_0
-         %93 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %91
-         %94 = OpLoad %Outer_std140 %93
-         %90 = OpFunctionCall %Outer %conv_Outer %94
-               OpStore %89 %90
-               OpBranch %77
-         %77 = OpLabel
-         %95 = OpLoad %uint %i_0
-         %96 = OpIAdd %uint %95 %uint_1
-               OpStore %i_0 %96
-               OpBranch %75
-         %76 = OpLabel
-         %97 = OpLoad %_arr_Outer_uint_4 %arr_0
-               OpReturnValue %97
-               OpFunctionEnd
-%load_a_inner_3_a_2_m = OpFunction %mat2v2float None %98
-        %100 = OpLabel
-        %106 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
-        %109 = OpAccessChain %_ptr_Uniform_v2float %106 %uint_0
-        %110 = OpLoad %v2float %109
-        %112 = OpAccessChain %_ptr_Uniform_v2float %106 %uint_1
-        %113 = OpLoad %v2float %112
-        %114 = OpCompositeConstruct %mat2v2float %110 %113
-               OpReturnValue %114
-               OpFunctionEnd
-          %f = OpFunction %void None %115
-        %118 = OpLabel
-        %121 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
-        %122 = OpLoad %_arr_Outer_std140_uint_4 %121
-        %119 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %122
-        %125 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
-        %126 = OpLoad %Outer_std140 %125
-        %123 = OpFunctionCall %Outer %conv_Outer %126
-        %129 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
-        %130 = OpLoad %_arr_Inner_std140_uint_4 %129
-        %127 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %130
-        %132 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
-        %133 = OpLoad %Inner_std140 %132
-        %131 = OpFunctionCall %Inner %conv_Inner %133
-        %134 = OpFunctionCall %mat2v2float %load_a_inner_3_a_2_m
-        %135 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
-        %136 = OpLoad %v2float %135
-        %138 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %32
-        %139 = OpLoad %float %138
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.wgsl
deleted file mode 100644
index 2e24f26..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/static_index_via_ptr.wgsl.expected.wgsl
+++ /dev/null

@@ -1,26 +0,0 @@
-struct Inner {
-  m : mat2x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  let p_a = &(a);
-  let p_a_3 = &((*(p_a))[3]);
-  let p_a_3_a = &((*(p_a_3)).a);
-  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
-  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
-  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
-  let l_a : array<Outer, 4> = *(p_a);
-  let l_a_3 : Outer = *(p_a_3);
-  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
-  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
-  let l_a_3_a_2_m : mat2x2<f32> = *(p_a_3_a_2_m);
-  let l_a_3_a_2_m_1 : vec2<f32> = *(p_a_3_a_2_m_1);
-  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl
deleted file mode 100644
index b712b62..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl
+++ /dev/null

@@ -1,14 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    let t = transpose(u[2].m);
-    let l = length(u[0].m[1].yx);
-    let a = abs(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.dxc.hlsl
deleted file mode 100644
index a6bdd17..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,19 +0,0 @@
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[8];
-};
-
-float2x2 tint_symbol(uint4 buffer[8], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const float2x2 t = transpose(tint_symbol(u, 72u));
-  const float l = length(asfloat(u[1].xy).yx);
-  const float a = abs(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.fxc.hlsl
deleted file mode 100644
index a6bdd17..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,19 +0,0 @@
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[8];
-};
-
-float2x2 tint_symbol(uint4 buffer[8], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const float2x2 t = transpose(tint_symbol(u, 72u));
-  const float l = length(asfloat(u[1].xy).yx);
-  const float a = abs(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.glsl
deleted file mode 100644
index ee2a35b..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.glsl
+++ /dev/null

@@ -1,38 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat2 m;
-  int after;
-  uint pad_1;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  int after;
-  uint pad_1;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-mat2 load_u_inner_2_m() {
-  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
-}
-
-void f() {
-  mat2 t = transpose(load_u_inner_2_m());
-  float l = length(u.inner[0u].m_1.yx);
-  float a = abs(u.inner[0u].m_1.yx[0u]);
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.msl
deleted file mode 100644
index 108d689..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.msl
+++ /dev/null

@@ -1,31 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float2x2 m;
-  /* 0x0018 */ int after;
-  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
-};
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
-  float2x2 const t = transpose((*(tint_symbol))[2].m);
-  float const l = length(float2((*(tint_symbol))[0].m[1]).yx);
-  float const a = fabs(float2((*(tint_symbol))[0].m[1]).yx[0]);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.spvasm
deleted file mode 100644
index e4863ac..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.spvasm
+++ /dev/null

@@ -1,75 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 46
-; Schema: 0
-               OpCapability Shader
-         %36 = OpExtInstImport "GLSL.std.450"
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "after"
-               OpName %u "u"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 32
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat2v2float = OpTypeMatrix %v2float 2
-         %11 = OpTypeFunction %mat2v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-     %uint_1 = OpConstant %uint 1
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-       %void = OpTypeVoid
-         %29 = OpTypeFunction %void
-         %37 = OpConstantNull %uint
-%load_u_inner_2_m = OpFunction %mat2v2float None %11
-         %14 = OpLabel
-         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %23 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_1
-         %24 = OpLoad %v2float %23
-         %26 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_2
-         %27 = OpLoad %v2float %26
-         %28 = OpCompositeConstruct %mat2v2float %24 %27
-               OpReturnValue %28
-               OpFunctionEnd
-          %f = OpFunction %void None %29
-         %32 = OpLabel
-         %34 = OpFunctionCall %mat2v2float %load_u_inner_2_m
-         %33 = OpTranspose %mat2v2float %34
-         %38 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
-         %39 = OpLoad %v2float %38
-         %40 = OpVectorShuffle %v2float %39 %39 1 0
-         %35 = OpExtInst %float %36 Length %40
-         %42 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
-         %43 = OpLoad %v2float %42
-         %44 = OpVectorShuffle %v2float %43 %43 1 0
-         %45 = OpCompositeExtract %float %44 0
-         %41 = OpExtInst %float %36 FAbs %45
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.wgsl
deleted file mode 100644
index 2e27c88..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_builtin.wgsl.expected.wgsl
+++ /dev/null

@@ -1,14 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  let t = transpose(u[2].m);
-  let l = length(u[0].m[1].yx);
-  let a = abs(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl
deleted file mode 100644
index 3ec4eda..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl
+++ /dev/null

@@ -1,22 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-fn a(a : array<S, 4>) {}
-fn b(s : S) {}
-fn c(m : mat2x2<f32>) {}
-fn d(v : vec2<f32>) {}
-fn e(f : f32) {}
-
-@compute @workgroup_size(1)
-fn f() {
-    a(u);
-    b(u[2]);
-    c(u[2].m);
-    d(u[0].m[1].yx);
-    e(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 9170ce8..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,60 +0,0 @@
-struct S {
-  int before;
-  float2x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[8];
-};
-
-void a(S a_1[4]) {
-}
-
-void b(S s) {
-}
-
-void c(float2x2 m) {
-}
-
-void d(float2 v) {
-}
-
-void e(float f_1) {
-}
-
-float2x2 tint_symbol_3(uint4 buffer[8], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[8], uint offset) {
-  const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  a(tint_symbol(u, 0u));
-  b(tint_symbol_1(u, 64u));
-  c(tint_symbol_3(u, 72u));
-  d(asfloat(u[1].xy).yx);
-  e(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 9170ce8..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,60 +0,0 @@
-struct S {
-  int before;
-  float2x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[8];
-};
-
-void a(S a_1[4]) {
-}
-
-void b(S s) {
-}
-
-void c(float2x2 m) {
-}
-
-void d(float2 v) {
-}
-
-void e(float f_1) {
-}
-
-float2x2 tint_symbol_3(uint4 buffer[8], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[8], uint offset) {
-  const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  a(tint_symbol(u, 0u));
-  b(tint_symbol_1(u, 64u));
-  c(tint_symbol_3(u, 72u));
-  d(asfloat(u[1].xy).yx);
-  e(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.glsl
deleted file mode 100644
index daaf731..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.glsl
+++ /dev/null

@@ -1,69 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat2 m;
-  int after;
-  uint pad_1;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  int after;
-  uint pad_1;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-void a(S a_1[4]) {
-}
-
-void b(S s) {
-}
-
-void c(mat2 m) {
-}
-
-void d(vec2 v) {
-}
-
-void e(float f_1) {
-}
-
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat2(val.m_0, val.m_1), val.after, val.pad_1);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat2 load_u_inner_2_m() {
-  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
-}
-
-void f() {
-  a(conv_arr4_S(u.inner));
-  b(conv_S(u.inner[2u]));
-  c(load_u_inner_2_m());
-  d(u.inner[0u].m_1.yx);
-  e(u.inner[0u].m_1.yx[0u]);
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.msl
deleted file mode 100644
index eda83c5..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.msl
+++ /dev/null

@@ -1,48 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float2x2 m;
-  /* 0x0018 */ int after;
-  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
-};
-
-void a(tint_array<S, 4> a_1) {
-}
-
-void b(S s) {
-}
-
-void c(float2x2 m) {
-}
-
-void d(float2 v) {
-}
-
-void e(float f_1) {
-}
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
-  a(*(tint_symbol));
-  b((*(tint_symbol))[2]);
-  c((*(tint_symbol))[2].m);
-  d(float2((*(tint_symbol))[0].m[1]).yx);
-  e(float2((*(tint_symbol))[0].m[1]).yx[0]);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.spvasm
deleted file mode 100644
index facd977..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.spvasm
+++ /dev/null

@@ -1,200 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 119
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "after"
-               OpName %u "u"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %a "a"
-               OpName %a_1 "a_1"
-               OpName %b "b"
-               OpName %s "s"
-               OpName %c "c"
-               OpName %m "m"
-               OpName %d "d"
-               OpName %v "v"
-               OpName %e "e"
-               OpName %f_1 "f_1"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 32
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 24
-               OpDecorate %_arr_S_uint_4 ArrayStride 32
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-       %void = OpTypeVoid
-%mat2v2float = OpTypeMatrix %v2float 2
-          %S = OpTypeStruct %int %mat2v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-         %11 = OpTypeFunction %void %_arr_S_uint_4
-         %19 = OpTypeFunction %void %S
-         %23 = OpTypeFunction %void %mat2v2float
-         %27 = OpTypeFunction %void %v2float
-         %31 = OpTypeFunction %void %float
-         %35 = OpTypeFunction %S %S_std140
-         %45 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-         %51 = OpConstantNull %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %54 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %67 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %80 = OpTypeFunction %mat2v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-         %96 = OpTypeFunction %void
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-          %a = OpFunction %void None %11
-        %a_1 = OpFunctionParameter %_arr_S_uint_4
-         %18 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %b = OpFunction %void None %19
-          %s = OpFunctionParameter %S
-         %22 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %c = OpFunction %void None %23
-          %m = OpFunctionParameter %mat2v2float
-         %26 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %d = OpFunction %void None %27
-          %v = OpFunctionParameter %v2float
-         %30 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %e = OpFunction %void None %31
-        %f_1 = OpFunctionParameter %float
-         %34 = OpLabel
-               OpReturn
-               OpFunctionEnd
-     %conv_S = OpFunction %S None %35
-        %val = OpFunctionParameter %S_std140
-         %38 = OpLabel
-         %39 = OpCompositeExtract %int %val 0
-         %40 = OpCompositeExtract %v2float %val 1
-         %41 = OpCompositeExtract %v2float %val 2
-         %42 = OpCompositeConstruct %mat2v2float %40 %41
-         %43 = OpCompositeExtract %int %val 3
-         %44 = OpCompositeConstruct %S %39 %42 %43
-               OpReturnValue %44
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %45
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %48 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %51
-          %i = OpVariable %_ptr_Function_uint Function %54
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %67
-               OpBranch %55
-         %55 = OpLabel
-               OpLoopMerge %56 %57 None
-               OpBranch %58
-         %58 = OpLabel
-         %60 = OpLoad %uint %i
-         %61 = OpULessThan %bool %60 %uint_4
-         %59 = OpLogicalNot %bool %61
-               OpSelectionMerge %63 None
-               OpBranchConditional %59 %64 %63
-         %64 = OpLabel
-               OpBranch %56
-         %63 = OpLabel
-               OpStore %var_for_index %val_0
-         %68 = OpLoad %uint %i
-         %70 = OpAccessChain %_ptr_Function_S %arr %68
-         %72 = OpLoad %uint %i
-         %74 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %72
-         %75 = OpLoad %S_std140 %74
-         %71 = OpFunctionCall %S %conv_S %75
-               OpStore %70 %71
-               OpBranch %57
-         %57 = OpLabel
-         %76 = OpLoad %uint %i
-         %78 = OpIAdd %uint %76 %uint_1
-               OpStore %i %78
-               OpBranch %55
-         %56 = OpLabel
-         %79 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %79
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat2v2float None %80
-         %82 = OpLabel
-         %87 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %90 = OpAccessChain %_ptr_Uniform_v2float %87 %uint_1
-         %91 = OpLoad %v2float %90
-         %93 = OpAccessChain %_ptr_Uniform_v2float %87 %uint_2
-         %94 = OpLoad %v2float %93
-         %95 = OpCompositeConstruct %mat2v2float %91 %94
-               OpReturnValue %95
-               OpFunctionEnd
-          %f = OpFunction %void None %96
-         %98 = OpLabel
-        %102 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-        %103 = OpLoad %_arr_S_std140_uint_4 %102
-        %100 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %103
-         %99 = OpFunctionCall %void %a %100
-        %106 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-        %107 = OpLoad %S_std140 %106
-        %105 = OpFunctionCall %S %conv_S %107
-        %104 = OpFunctionCall %void %b %105
-        %109 = OpFunctionCall %mat2v2float %load_u_inner_2_m
-        %108 = OpFunctionCall %void %c %109
-        %111 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %54 %uint_2
-        %112 = OpLoad %v2float %111
-        %113 = OpVectorShuffle %v2float %112 %112 1 0
-        %110 = OpFunctionCall %void %d %113
-        %115 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %54 %uint_2
-        %116 = OpLoad %v2float %115
-        %117 = OpVectorShuffle %v2float %116 %116 1 0
-        %118 = OpCompositeExtract %float %117 0
-        %114 = OpFunctionCall %void %e %118
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.wgsl
deleted file mode 100644
index f8ed037..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_fn.wgsl.expected.wgsl
+++ /dev/null

@@ -1,31 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-fn a(a : array<S, 4>) {
-}
-
-fn b(s : S) {
-}
-
-fn c(m : mat2x2<f32>) {
-}
-
-fn d(v : vec2<f32>) {
-}
-
-fn e(f : f32) {
-}
-
-@compute @workgroup_size(1)
-fn f() {
-  a(u);
-  b(u[2]);
-  c(u[2].m);
-  d(u[0].m[1].yx);
-  e(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl
deleted file mode 100644
index 4cf6d16..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-var<private> p : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    p = u;
-    p[1] = u[2];
-    p[3].m = u[2].m;
-    p[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 6aa0a11..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,45 +0,0 @@
-struct S {
-  int before;
-  float2x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[8];
-};
-static S p[4] = (S[4])0;
-
-float2x2 tint_symbol_3(uint4 buffer[8], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[8], uint offset) {
-  const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  p = tint_symbol(u, 0u);
-  p[1] = tint_symbol_1(u, 64u);
-  p[3].m = tint_symbol_3(u, 72u);
-  p[1].m[0] = asfloat(u[1].xy).yx;
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 6aa0a11..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,45 +0,0 @@
-struct S {
-  int before;
-  float2x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[8];
-};
-static S p[4] = (S[4])0;
-
-float2x2 tint_symbol_3(uint4 buffer[8], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[8], uint offset) {
-  const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  p = tint_symbol(u, 0u);
-  p[1] = tint_symbol_1(u, 64u);
-  p[3].m = tint_symbol_3(u, 72u);
-  p[1].m[0] = asfloat(u[1].xy).yx;
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.glsl
deleted file mode 100644
index 9634531..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.glsl
+++ /dev/null

@@ -1,54 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat2 m;
-  int after;
-  uint pad_1;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  int after;
-  uint pad_1;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-S p[4] = S[4](S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u));
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat2(val.m_0, val.m_1), val.after, val.pad_1);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat2 load_u_inner_2_m() {
-  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
-}
-
-void f() {
-  p = conv_arr4_S(u.inner);
-  p[1] = conv_S(u.inner[2u]);
-  p[3].m = load_u_inner_2_m();
-  p[1].m[0] = u.inner[0u].m_1.yx;
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.msl
deleted file mode 100644
index 87743fb..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.msl
+++ /dev/null

@@ -1,33 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float2x2 m;
-  /* 0x0018 */ int after;
-  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
-};
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
-  thread tint_array<S, 4> tint_symbol = {};
-  tint_symbol = *(tint_symbol_1);
-  tint_symbol[1] = (*(tint_symbol_1))[2];
-  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
-  tint_symbol[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.spvasm
deleted file mode 100644
index 57765b4..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.spvasm
+++ /dev/null

@@ -1,167 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 101
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "after"
-               OpName %u "u"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %p "p"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 32
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 24
-               OpDecorate %_arr_S_uint_4 ArrayStride 32
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat2v2float = OpTypeMatrix %v2float 2
-          %S = OpTypeStruct %int %mat2v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
-         %16 = OpConstantNull %_arr_S_uint_4
-          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
-         %17 = OpTypeFunction %S %S_std140
-         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %35 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %48 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %61 = OpTypeFunction %mat2v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-       %void = OpTypeVoid
-         %77 = OpTypeFunction %void
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-      %int_1 = OpConstant %int 1
-%_ptr_Private_S = OpTypePointer Private %S
-      %int_3 = OpConstant %int 3
-%_ptr_Private_mat2v2float = OpTypePointer Private %mat2v2float
-         %95 = OpConstantNull %int
-%_ptr_Private_v2float = OpTypePointer Private %v2float
-     %conv_S = OpFunction %S None %17
-        %val = OpFunctionParameter %S_std140
-         %20 = OpLabel
-         %21 = OpCompositeExtract %int %val 0
-         %22 = OpCompositeExtract %v2float %val 1
-         %23 = OpCompositeExtract %v2float %val 2
-         %24 = OpCompositeConstruct %mat2v2float %22 %23
-         %25 = OpCompositeExtract %int %val 3
-         %26 = OpCompositeConstruct %S %21 %24 %25
-               OpReturnValue %26
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %27
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %30 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
-          %i = OpVariable %_ptr_Function_uint Function %35
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %48
-               OpBranch %36
-         %36 = OpLabel
-               OpLoopMerge %37 %38 None
-               OpBranch %39
-         %39 = OpLabel
-         %41 = OpLoad %uint %i
-         %42 = OpULessThan %bool %41 %uint_4
-         %40 = OpLogicalNot %bool %42
-               OpSelectionMerge %44 None
-               OpBranchConditional %40 %45 %44
-         %45 = OpLabel
-               OpBranch %37
-         %44 = OpLabel
-               OpStore %var_for_index %val_0
-         %49 = OpLoad %uint %i
-         %51 = OpAccessChain %_ptr_Function_S %arr %49
-         %53 = OpLoad %uint %i
-         %55 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %53
-         %56 = OpLoad %S_std140 %55
-         %52 = OpFunctionCall %S %conv_S %56
-               OpStore %51 %52
-               OpBranch %38
-         %38 = OpLabel
-         %57 = OpLoad %uint %i
-         %59 = OpIAdd %uint %57 %uint_1
-               OpStore %i %59
-               OpBranch %36
-         %37 = OpLabel
-         %60 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %60
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat2v2float None %61
-         %63 = OpLabel
-         %68 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %71 = OpAccessChain %_ptr_Uniform_v2float %68 %uint_1
-         %72 = OpLoad %v2float %71
-         %74 = OpAccessChain %_ptr_Uniform_v2float %68 %uint_2
-         %75 = OpLoad %v2float %74
-         %76 = OpCompositeConstruct %mat2v2float %72 %75
-               OpReturnValue %76
-               OpFunctionEnd
-          %f = OpFunction %void None %77
-         %80 = OpLabel
-         %83 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-         %84 = OpLoad %_arr_S_std140_uint_4 %83
-         %81 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %84
-               OpStore %p %81
-         %87 = OpAccessChain %_ptr_Private_S %p %int_1
-         %89 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %90 = OpLoad %S_std140 %89
-         %88 = OpFunctionCall %S %conv_S %90
-               OpStore %87 %88
-         %93 = OpAccessChain %_ptr_Private_mat2v2float %p %int_3 %uint_1
-         %94 = OpFunctionCall %mat2v2float %load_u_inner_2_m
-               OpStore %93 %94
-         %97 = OpAccessChain %_ptr_Private_v2float %p %int_1 %uint_1 %95
-         %98 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %35 %uint_2
-         %99 = OpLoad %v2float %98
-        %100 = OpVectorShuffle %v2float %99 %99 1 0
-               OpStore %97 %100
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.wgsl
deleted file mode 100644
index 9d4358a..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_private.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-var<private> p : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  p = u;
-  p[1] = u[2];
-  p[3].m = u[2].m;
-  p[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl
deleted file mode 100644
index 8d2687b..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    s = u;
-    s[1] = u[2];
-    s[3].m = u[2].m;
-    s[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.dxc.hlsl
deleted file mode 100644
index c0ccad6..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,65 +0,0 @@
-struct S {
-  int before;
-  float2x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[8];
-};
-RWByteAddressBuffer s : register(u1, space0);
-
-void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
-  buffer.Store2((offset + 0u), asuint(value[0u]));
-  buffer.Store2((offset + 8u), asuint(value[1u]));
-}
-
-void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
-  buffer.Store((offset + 0u), asuint(value.before));
-  tint_symbol_3(buffer, (offset + 8u), value.m);
-  buffer.Store((offset + 24u), asuint(value.after));
-}
-
-void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
-  S array[4] = value;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
-    }
-  }
-}
-
-float2x2 tint_symbol_8(uint4 buffer[8], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-S tint_symbol_6(uint4 buffer[8], uint offset) {
-  const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
-  return tint_symbol_10;
-}
-
-typedef S tint_symbol_5_ret[4];
-tint_symbol_5_ret tint_symbol_5(uint4 buffer[8], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 32u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
-  tint_symbol_1(s, 32u, tint_symbol_6(u, 64u));
-  tint_symbol_3(s, 104u, tint_symbol_8(u, 72u));
-  s.Store2(40u, asuint(asfloat(u[1].xy).yx));
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.fxc.hlsl
deleted file mode 100644
index c0ccad6..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,65 +0,0 @@
-struct S {
-  int before;
-  float2x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[8];
-};
-RWByteAddressBuffer s : register(u1, space0);
-
-void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
-  buffer.Store2((offset + 0u), asuint(value[0u]));
-  buffer.Store2((offset + 8u), asuint(value[1u]));
-}
-
-void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
-  buffer.Store((offset + 0u), asuint(value.before));
-  tint_symbol_3(buffer, (offset + 8u), value.m);
-  buffer.Store((offset + 24u), asuint(value.after));
-}
-
-void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
-  S array[4] = value;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
-    }
-  }
-}
-
-float2x2 tint_symbol_8(uint4 buffer[8], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-S tint_symbol_6(uint4 buffer[8], uint offset) {
-  const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
-  return tint_symbol_10;
-}
-
-typedef S tint_symbol_5_ret[4];
-tint_symbol_5_ret tint_symbol_5(uint4 buffer[8], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 32u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
-  tint_symbol_1(s, 32u, tint_symbol_6(u, 64u));
-  tint_symbol_3(s, 104u, tint_symbol_8(u, 72u));
-  s.Store2(40u, asuint(asfloat(u[1].xy).yx));
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.glsl
deleted file mode 100644
index ec19f64..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.glsl
+++ /dev/null

@@ -1,57 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat2 m;
-  int after;
-  uint pad_1;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  int after;
-  uint pad_1;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-layout(binding = 1, std430) buffer u_block_ssbo {
-  S inner[4];
-} s;
-
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat2(val.m_0, val.m_1), val.after, val.pad_1);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat2 load_u_inner_2_m() {
-  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
-}
-
-void f() {
-  s.inner = conv_arr4_S(u.inner);
-  s.inner[1] = conv_S(u.inner[2u]);
-  s.inner[3].m = load_u_inner_2_m();
-  s.inner[1].m[0] = u.inner[0u].m_1.yx;
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.msl
deleted file mode 100644
index b3b4048..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.msl
+++ /dev/null

@@ -1,32 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float2x2 m;
-  /* 0x0018 */ int after;
-  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
-};
-
-kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
-  *(tint_symbol) = *(tint_symbol_1);
-  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
-  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
-  (*(tint_symbol))[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.spvasm
deleted file mode 100644
index 6f393c7..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.spvasm
+++ /dev/null

@@ -1,176 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 104
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "after"
-               OpName %u "u"
-               OpName %u_block "u_block"
-               OpMemberName %u_block 0 "inner"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %s "s"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 32
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpDecorate %u_block Block
-               OpMemberDecorate %u_block 0 Offset 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 24
-               OpDecorate %_arr_S_uint_4 ArrayStride 32
-               OpDecorate %s DescriptorSet 0
-               OpDecorate %s Binding 1
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat2v2float = OpTypeMatrix %v2float 2
-          %S = OpTypeStruct %int %mat2v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-    %u_block = OpTypeStruct %_arr_S_uint_4
-%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
-          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
-         %17 = OpTypeFunction %S %S_std140
-         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-         %33 = OpConstantNull %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %36 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %49 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %62 = OpTypeFunction %mat2v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-       %void = OpTypeVoid
-         %78 = OpTypeFunction %void
-%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-      %int_1 = OpConstant %int 1
-%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
-      %int_3 = OpConstant %int 3
-%_ptr_StorageBuffer_mat2v2float = OpTypePointer StorageBuffer %mat2v2float
-         %98 = OpConstantNull %int
-%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
-     %conv_S = OpFunction %S None %17
-        %val = OpFunctionParameter %S_std140
-         %20 = OpLabel
-         %21 = OpCompositeExtract %int %val 0
-         %22 = OpCompositeExtract %v2float %val 1
-         %23 = OpCompositeExtract %v2float %val 2
-         %24 = OpCompositeConstruct %mat2v2float %22 %23
-         %25 = OpCompositeExtract %int %val 3
-         %26 = OpCompositeConstruct %S %21 %24 %25
-               OpReturnValue %26
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %27
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %30 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %33
-          %i = OpVariable %_ptr_Function_uint Function %36
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
-               OpBranch %37
-         %37 = OpLabel
-               OpLoopMerge %38 %39 None
-               OpBranch %40
-         %40 = OpLabel
-         %42 = OpLoad %uint %i
-         %43 = OpULessThan %bool %42 %uint_4
-         %41 = OpLogicalNot %bool %43
-               OpSelectionMerge %45 None
-               OpBranchConditional %41 %46 %45
-         %46 = OpLabel
-               OpBranch %38
-         %45 = OpLabel
-               OpStore %var_for_index %val_0
-         %50 = OpLoad %uint %i
-         %52 = OpAccessChain %_ptr_Function_S %arr %50
-         %54 = OpLoad %uint %i
-         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
-         %57 = OpLoad %S_std140 %56
-         %53 = OpFunctionCall %S %conv_S %57
-               OpStore %52 %53
-               OpBranch %39
-         %39 = OpLabel
-         %58 = OpLoad %uint %i
-         %60 = OpIAdd %uint %58 %uint_1
-               OpStore %i %60
-               OpBranch %37
-         %38 = OpLabel
-         %61 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %61
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat2v2float None %62
-         %64 = OpLabel
-         %69 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %72 = OpAccessChain %_ptr_Uniform_v2float %69 %uint_1
-         %73 = OpLoad %v2float %72
-         %75 = OpAccessChain %_ptr_Uniform_v2float %69 %uint_2
-         %76 = OpLoad %v2float %75
-         %77 = OpCompositeConstruct %mat2v2float %73 %76
-               OpReturnValue %77
-               OpFunctionEnd
-          %f = OpFunction %void None %78
-         %81 = OpLabel
-         %83 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
-         %86 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-         %87 = OpLoad %_arr_S_std140_uint_4 %86
-         %84 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %87
-               OpStore %83 %84
-         %90 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
-         %92 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %93 = OpLoad %S_std140 %92
-         %91 = OpFunctionCall %S %conv_S %93
-               OpStore %90 %91
-         %96 = OpAccessChain %_ptr_StorageBuffer_mat2v2float %s %uint_0 %int_3 %uint_1
-         %97 = OpFunctionCall %mat2v2float %load_u_inner_2_m
-               OpStore %96 %97
-        %100 = OpAccessChain %_ptr_StorageBuffer_v2float %s %uint_0 %int_1 %uint_1 %98
-        %101 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %36 %uint_2
-        %102 = OpLoad %v2float %101
-        %103 = OpVectorShuffle %v2float %102 %102 1 0
-               OpStore %100 %103
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.wgsl
deleted file mode 100644
index e9ed5d0..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_storage.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  s = u;
-  s[1] = u[2];
-  s[3].m = u[2].m;
-  s[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl
deleted file mode 100644
index 714b643..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-var<workgroup> w : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    w = u;
-    w[1] = u[2];
-    w[3].m = u[2].m;
-    w[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 1310e9e..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,61 +0,0 @@
-struct S {
-  int before;
-  float2x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[8];
-};
-groupshared S w[4];
-
-struct tint_symbol_1 {
-  uint local_invocation_index : SV_GroupIndex;
-};
-
-float2x2 tint_symbol_5(uint4 buffer[8], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-S tint_symbol_3(uint4 buffer[8], uint offset) {
-  const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
-  return tint_symbol_8;
-}
-
-typedef S tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
-    }
-  }
-  return arr;
-}
-
-void f_inner(uint local_invocation_index) {
-  {
-    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-      const uint i = idx;
-      const S tint_symbol_7 = (S)0;
-      w[i] = tint_symbol_7;
-    }
-  }
-  GroupMemoryBarrierWithGroupSync();
-  w = tint_symbol_2(u, 0u);
-  w[1] = tint_symbol_3(u, 64u);
-  w[3].m = tint_symbol_5(u, 72u);
-  w[1].m[0] = asfloat(u[1].xy).yx;
-}
-
-[numthreads(1, 1, 1)]
-void f(tint_symbol_1 tint_symbol) {
-  f_inner(tint_symbol.local_invocation_index);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 1310e9e..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,61 +0,0 @@
-struct S {
-  int before;
-  float2x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[8];
-};
-groupshared S w[4];
-
-struct tint_symbol_1 {
-  uint local_invocation_index : SV_GroupIndex;
-};
-
-float2x2 tint_symbol_5(uint4 buffer[8], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
-}
-
-S tint_symbol_3(uint4 buffer[8], uint offset) {
-  const uint scalar_offset_2 = ((offset + 0u)) / 4;
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
-  return tint_symbol_8;
-}
-
-typedef S tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
-    }
-  }
-  return arr;
-}
-
-void f_inner(uint local_invocation_index) {
-  {
-    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-      const uint i = idx;
-      const S tint_symbol_7 = (S)0;
-      w[i] = tint_symbol_7;
-    }
-  }
-  GroupMemoryBarrierWithGroupSync();
-  w = tint_symbol_2(u, 0u);
-  w[1] = tint_symbol_3(u, 64u);
-  w[3].m = tint_symbol_5(u, 72u);
-  w[1].m[0] = asfloat(u[1].xy).yx;
-}
-
-[numthreads(1, 1, 1)]
-void f(tint_symbol_1 tint_symbol) {
-  f_inner(tint_symbol.local_invocation_index);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.glsl
deleted file mode 100644
index cdc0450..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.glsl
+++ /dev/null

@@ -1,62 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat2 m;
-  int after;
-  uint pad_1;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  int after;
-  uint pad_1;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-shared S w[4];
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat2(val.m_0, val.m_1), val.after, val.pad_1);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat2 load_u_inner_2_m() {
-  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
-}
-
-void f(uint local_invocation_index) {
-  {
-    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-      uint i = idx;
-      S tint_symbol = S(0, 0u, mat2(vec2(0.0f), vec2(0.0f)), 0, 0u);
-      w[i] = tint_symbol;
-    }
-  }
-  barrier();
-  w = conv_arr4_S(u.inner);
-  w[1] = conv_S(u.inner[2u]);
-  w[3].m = load_u_inner_2_m();
-  w[1].m[0] = u.inner[0u].m_1.yx;
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f(gl_LocalInvocationIndex);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.msl
deleted file mode 100644
index 85861c7..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.msl
+++ /dev/null

@@ -1,47 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float2x2 m;
-  /* 0x0018 */ int after;
-  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
-};
-
-struct tint_symbol_6 {
-  tint_array<S, 4> w;
-};
-
-void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
-  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-    uint const i = idx;
-    S const tint_symbol = S{};
-    (*(tint_symbol_1))[i] = tint_symbol;
-  }
-  threadgroup_barrier(mem_flags::mem_threadgroup);
-  *(tint_symbol_1) = *(tint_symbol_2);
-  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
-  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
-  (*(tint_symbol_1))[1].m[0] = float2((*(tint_symbol_2))[0].m[1]).yx;
-}
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
-  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.spvasm
deleted file mode 100644
index a6922dc..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.spvasm
+++ /dev/null

@@ -1,210 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 126
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %local_invocation_index_1 "local_invocation_index_1"
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "after"
-               OpName %u "u"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %w "w"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f_inner "f_inner"
-               OpName %local_invocation_index "local_invocation_index"
-               OpName %idx "idx"
-               OpName %f "f"
-               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 32
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 24
-               OpDecorate %_arr_S_uint_4 ArrayStride 32
-       %uint = OpTypeInt 32 0
-%_ptr_Input_uint = OpTypePointer Input %uint
-%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %int
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat2v2float = OpTypeMatrix %v2float 2
-          %S = OpTypeStruct %int %mat2v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
-          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
-         %18 = OpTypeFunction %S %S_std140
-         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-         %34 = OpConstantNull %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %37 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %50 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %63 = OpTypeFunction %mat2v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-       %void = OpTypeVoid
-         %79 = OpTypeFunction %void %uint
-%_ptr_Workgroup_S = OpTypePointer Workgroup %S
-         %97 = OpConstantNull %S
-   %uint_264 = OpConstant %uint 264
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-      %int_1 = OpConstant %int 1
-      %int_3 = OpConstant %int 3
-%_ptr_Workgroup_mat2v2float = OpTypePointer Workgroup %mat2v2float
-        %115 = OpConstantNull %int
-%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
-        %121 = OpTypeFunction %void
-     %conv_S = OpFunction %S None %18
-        %val = OpFunctionParameter %S_std140
-         %21 = OpLabel
-         %22 = OpCompositeExtract %int %val 0
-         %23 = OpCompositeExtract %v2float %val 1
-         %24 = OpCompositeExtract %v2float %val 2
-         %25 = OpCompositeConstruct %mat2v2float %23 %24
-         %26 = OpCompositeExtract %int %val 3
-         %27 = OpCompositeConstruct %S %22 %25 %26
-               OpReturnValue %27
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %31 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
-          %i = OpVariable %_ptr_Function_uint Function %37
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
-               OpBranch %38
-         %38 = OpLabel
-               OpLoopMerge %39 %40 None
-               OpBranch %41
-         %41 = OpLabel
-         %43 = OpLoad %uint %i
-         %44 = OpULessThan %bool %43 %uint_4
-         %42 = OpLogicalNot %bool %44
-               OpSelectionMerge %46 None
-               OpBranchConditional %42 %47 %46
-         %47 = OpLabel
-               OpBranch %39
-         %46 = OpLabel
-               OpStore %var_for_index %val_0
-         %51 = OpLoad %uint %i
-         %53 = OpAccessChain %_ptr_Function_S %arr %51
-         %55 = OpLoad %uint %i
-         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
-         %58 = OpLoad %S_std140 %57
-         %54 = OpFunctionCall %S %conv_S %58
-               OpStore %53 %54
-               OpBranch %40
-         %40 = OpLabel
-         %59 = OpLoad %uint %i
-         %61 = OpIAdd %uint %59 %uint_1
-               OpStore %i %61
-               OpBranch %38
-         %39 = OpLabel
-         %62 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %62
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat2v2float None %63
-         %65 = OpLabel
-         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %73 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_1
-         %74 = OpLoad %v2float %73
-         %76 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_2
-         %77 = OpLoad %v2float %76
-         %78 = OpCompositeConstruct %mat2v2float %74 %77
-               OpReturnValue %78
-               OpFunctionEnd
-    %f_inner = OpFunction %void None %79
-%local_invocation_index = OpFunctionParameter %uint
-         %83 = OpLabel
-        %idx = OpVariable %_ptr_Function_uint Function %37
-               OpStore %idx %local_invocation_index
-               OpBranch %85
-         %85 = OpLabel
-               OpLoopMerge %86 %87 None
-               OpBranch %88
-         %88 = OpLabel
-         %90 = OpLoad %uint %idx
-         %91 = OpULessThan %bool %90 %uint_4
-         %89 = OpLogicalNot %bool %91
-               OpSelectionMerge %92 None
-               OpBranchConditional %89 %93 %92
-         %93 = OpLabel
-               OpBranch %86
-         %92 = OpLabel
-         %94 = OpLoad %uint %idx
-         %96 = OpAccessChain %_ptr_Workgroup_S %w %94
-               OpStore %96 %97
-               OpBranch %87
-         %87 = OpLabel
-         %98 = OpLoad %uint %idx
-         %99 = OpIAdd %uint %98 %uint_1
-               OpStore %idx %99
-               OpBranch %85
-         %86 = OpLabel
-               OpControlBarrier %uint_2 %uint_2 %uint_264
-        %104 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-        %105 = OpLoad %_arr_S_std140_uint_4 %104
-        %102 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %105
-               OpStore %w %102
-        %107 = OpAccessChain %_ptr_Workgroup_S %w %int_1
-        %109 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-        %110 = OpLoad %S_std140 %109
-        %108 = OpFunctionCall %S %conv_S %110
-               OpStore %107 %108
-        %113 = OpAccessChain %_ptr_Workgroup_mat2v2float %w %int_3 %uint_1
-        %114 = OpFunctionCall %mat2v2float %load_u_inner_2_m
-               OpStore %113 %114
-        %117 = OpAccessChain %_ptr_Workgroup_v2float %w %int_1 %uint_1 %115
-        %118 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
-        %119 = OpLoad %v2float %118
-        %120 = OpVectorShuffle %v2float %119 %119 1 0
-               OpStore %117 %120
-               OpReturn
-               OpFunctionEnd
-          %f = OpFunction %void None %121
-        %123 = OpLabel
-        %125 = OpLoad %uint %local_invocation_index_1
-        %124 = OpFunctionCall %void %f_inner %125
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.wgsl
deleted file mode 100644
index 14ff361..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat2x2/to_workgroup.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat2x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-var<workgroup> w : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  w = u;
-  w[1] = u[2];
-  w[3].m = u[2].m;
-  w[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..5a08d75
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat2x2<f16>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec2<f16>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f16             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..6fdfa27
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,79 @@
+struct Inner {
+  matrix<float16_t, 2, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 2> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 2, 2> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (4u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint ubo_load_2 = a[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const vector<float16_t, 2> l_a_i_a_i_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (4u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7ca22da
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,84 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 2, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 2> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 2, 2> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (4u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint ubo_load_2 = a[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const vector<float16_t, 2> l_a_i_a_i_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (4u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000270DFBEAD70(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..b75bad2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,151 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+};
+
+struct Inner_std140 {
+  f16vec2 m_0;
+  f16vec2 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat2(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return f16mat2(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1);
+}
+
+f16vec2 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    default: {
+      return f16vec2(0.0hf);
+      break;
+    }
+  }
+}
+
+float16_t load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    default: {
+      return 0.0hf;
+      break;
+    }
+  }
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat2 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  f16vec2 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat2 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  f16vec2 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float16_t l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..3c067f3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half2x2 m;
+  /* 0x0008 */ tint_array<int8_t, 56> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  half2x2 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  half2 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  half const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..93f999c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,310 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 193
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 4
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 4
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%Inner_std140 = OpTypeStruct %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat2v2half = OpTypeMatrix %v2half 2
+      %Inner = OpTypeStruct %mat2v2half
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %33 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %40 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %43 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %56 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %69 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %77 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %84 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %97 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %109 = OpTypeFunction %mat2v2half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+        %126 = OpTypeFunction %v2half %uint %uint %uint
+        %140 = OpConstantNull %v2half
+        %141 = OpTypeFunction %half %uint %uint %uint %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %157 = OpConstantNull %half
+       %void = OpTypeVoid
+        %158 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v2half %val 0
+         %30 = OpCompositeExtract %v2half %val 1
+         %31 = OpCompositeConstruct %mat2v2half %29 %30
+         %32 = OpCompositeConstruct %Inner %31
+               OpReturnValue %32
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %33
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %37 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %40
+        %i_0 = OpVariable %_ptr_Function_uint Function %43
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %56
+               OpBranch %44
+         %44 = OpLabel
+               OpLoopMerge %45 %46 None
+               OpBranch %47
+         %47 = OpLabel
+         %49 = OpLoad %uint %i_0
+         %50 = OpULessThan %bool %49 %uint_4
+         %48 = OpLogicalNot %bool %50
+               OpSelectionMerge %52 None
+               OpBranchConditional %48 %53 %52
+         %53 = OpLabel
+               OpBranch %45
+         %52 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %57 = OpLoad %uint %i_0
+         %59 = OpAccessChain %_ptr_Function_Inner %arr %57
+         %61 = OpLoad %uint %i_0
+         %63 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %61
+         %64 = OpLoad %Inner_std140 %63
+         %60 = OpFunctionCall %Inner %conv_Inner %64
+               OpStore %59 %60
+               OpBranch %46
+         %46 = OpLabel
+         %65 = OpLoad %uint %i_0
+         %67 = OpIAdd %uint %65 %uint_1
+               OpStore %i_0 %67
+               OpBranch %44
+         %45 = OpLabel
+         %68 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %68
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %69
+      %val_1 = OpFunctionParameter %Outer_std140
+         %73 = OpLabel
+         %75 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %74 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %75
+         %76 = OpCompositeConstruct %Outer %74
+               OpReturnValue %76
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %77
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %81 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %84
+        %i_1 = OpVariable %_ptr_Function_uint Function %43
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %97
+               OpBranch %86
+         %86 = OpLabel
+               OpLoopMerge %87 %88 None
+               OpBranch %89
+         %89 = OpLabel
+         %91 = OpLoad %uint %i_1
+         %92 = OpULessThan %bool %91 %uint_4
+         %90 = OpLogicalNot %bool %92
+               OpSelectionMerge %93 None
+               OpBranchConditional %90 %94 %93
+         %94 = OpLabel
+               OpBranch %87
+         %93 = OpLabel
+               OpStore %var_for_index %val_2
+         %98 = OpLoad %uint %i_1
+        %100 = OpAccessChain %_ptr_Function_Outer %arr_0 %98
+        %102 = OpLoad %uint %i_1
+        %104 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %102
+        %105 = OpLoad %Outer_std140 %104
+        %101 = OpFunctionCall %Outer %conv_Outer %105
+               OpStore %100 %101
+               OpBranch %88
+         %88 = OpLabel
+        %106 = OpLoad %uint %i_1
+        %107 = OpIAdd %uint %106 %uint_1
+               OpStore %i_1 %107
+               OpBranch %86
+         %87 = OpLabel
+        %108 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %108
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m = OpFunction %mat2v2half None %109
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %113 = OpLabel
+        %117 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
+        %120 = OpAccessChain %_ptr_Uniform_v2half %117 %uint_0
+        %121 = OpLoad %v2half %120
+        %123 = OpAccessChain %_ptr_Uniform_v2half %117 %uint_1
+        %124 = OpLoad %v2half %123
+        %125 = OpCompositeConstruct %mat2v2half %121 %124
+               OpReturnValue %125
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2 = OpFunction %v2half None %126
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %131 = OpLabel
+               OpSelectionMerge %132 None
+               OpSwitch %p2 %133 0 %134 1 %135
+        %134 = OpLabel
+        %136 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %137 = OpLoad %v2half %136
+               OpReturnValue %137
+        %135 = OpLabel
+        %138 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %139 = OpLoad %v2half %138
+               OpReturnValue %139
+        %133 = OpLabel
+               OpReturnValue %140
+        %132 = OpLabel
+               OpReturnValue %140
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %half None %141
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %147 = OpLabel
+               OpSelectionMerge %148 None
+               OpSwitch %p2_0 %149 0 %150 1 %151
+        %150 = OpLabel
+        %153 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %154 = OpLoad %half %153
+               OpReturnValue %154
+        %151 = OpLabel
+        %155 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %156 = OpLoad %half %155
+               OpReturnValue %156
+        %149 = OpLabel
+               OpReturnValue %157
+        %148 = OpLabel
+               OpReturnValue %157
+               OpFunctionEnd
+          %f = OpFunction %void None %158
+        %161 = OpLabel
+        %162 = OpFunctionCall %int %i
+        %163 = OpFunctionCall %int %i
+        %164 = OpFunctionCall %int %i
+        %167 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %168 = OpLoad %_arr_Outer_std140_uint_4 %167
+        %165 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %168
+        %171 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %162
+        %172 = OpLoad %Outer_std140 %171
+        %169 = OpFunctionCall %Outer %conv_Outer %172
+        %175 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %162 %uint_0
+        %176 = OpLoad %_arr_Inner_std140_uint_4 %175
+        %173 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %176
+        %178 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %162 %uint_0 %163
+        %179 = OpLoad %Inner_std140 %178
+        %177 = OpFunctionCall %Inner %conv_Inner %179
+        %181 = OpBitcast %uint %162
+        %182 = OpBitcast %uint %163
+        %180 = OpFunctionCall %mat2v2half %load_a_inner_p0_a_p1_m %181 %182
+        %184 = OpBitcast %uint %162
+        %185 = OpBitcast %uint %163
+        %186 = OpBitcast %uint %164
+        %183 = OpFunctionCall %v2half %load_a_inner_p0_a_p1_m_p2 %184 %185 %186
+        %187 = OpFunctionCall %int %i
+        %189 = OpBitcast %uint %162
+        %190 = OpBitcast %uint %163
+        %191 = OpBitcast %uint %164
+        %192 = OpBitcast %uint %187
+        %188 = OpFunctionCall %half %load_a_inner_p0_a_p1_m_p2_p3 %189 %190 %191 %192
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..59f69e1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,36 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat2x2<f16> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec2<f16> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f16 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..3e2c36f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,31 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat2x2<f16>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec2<f16>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f16             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..fa6940d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,63 @@
+struct Inner {
+  matrix<float16_t, 2, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 2, 2> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 2, 2> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint ubo_load_2 = a[56].y;
+  const vector<float16_t, 2> l_a_3_a_2_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].y) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..64dbf1e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,68 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 2, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 2, 2> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 2, 2> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint ubo_load_2 = a[56].y;
+  const vector<float16_t, 2> l_a_3_a_2_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].y) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000025C8106DF60(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..c29755c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,105 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+};
+
+struct Inner_std140 {
+  f16vec2 m_0;
+  f16vec2 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat2(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2 load_a_inner_3_a_2_m() {
+  return f16mat2(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1);
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat2 p_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec2 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat2 l_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec2 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float16_t l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..b44cfed
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half2x2 m;
+  /* 0x0008 */ tint_array<int8_t, 56> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  half2x2 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  half2 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  half const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..762929a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,227 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 140
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 4
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 4
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%Inner_std140 = OpTypeStruct %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat2v2half = OpTypeMatrix %v2half 2
+      %Inner = OpTypeStruct %mat2v2half
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %22 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %29 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %32 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %45 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %58 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %66 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %73 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %86 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+         %98 = OpTypeFunction %mat2v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %void = OpTypeVoid
+        %115 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v2half %val 0
+         %19 = OpCompositeExtract %v2half %val 1
+         %20 = OpCompositeConstruct %mat2v2half %18 %19
+         %21 = OpCompositeConstruct %Inner %20
+               OpReturnValue %21
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %22
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %26 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %29
+          %i = OpVariable %_ptr_Function_uint Function %32
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %45
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %34 %35 None
+               OpBranch %36
+         %36 = OpLabel
+         %38 = OpLoad %uint %i
+         %39 = OpULessThan %bool %38 %uint_4
+         %37 = OpLogicalNot %bool %39
+               OpSelectionMerge %41 None
+               OpBranchConditional %37 %42 %41
+         %42 = OpLabel
+               OpBranch %34
+         %41 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_Inner %arr %46
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %50
+         %53 = OpLoad %Inner_std140 %52
+         %49 = OpFunctionCall %Inner %conv_Inner %53
+               OpStore %48 %49
+               OpBranch %35
+         %35 = OpLabel
+         %54 = OpLoad %uint %i
+         %56 = OpIAdd %uint %54 %uint_1
+               OpStore %i %56
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %57
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %58
+      %val_1 = OpFunctionParameter %Outer_std140
+         %62 = OpLabel
+         %64 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %63 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %64
+         %65 = OpCompositeConstruct %Outer %63
+               OpReturnValue %65
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %66
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %70 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %73
+        %i_0 = OpVariable %_ptr_Function_uint Function %32
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %86
+               OpBranch %75
+         %75 = OpLabel
+               OpLoopMerge %76 %77 None
+               OpBranch %78
+         %78 = OpLabel
+         %80 = OpLoad %uint %i_0
+         %81 = OpULessThan %bool %80 %uint_4
+         %79 = OpLogicalNot %bool %81
+               OpSelectionMerge %82 None
+               OpBranchConditional %79 %83 %82
+         %83 = OpLabel
+               OpBranch %76
+         %82 = OpLabel
+               OpStore %var_for_index %val_2
+         %87 = OpLoad %uint %i_0
+         %89 = OpAccessChain %_ptr_Function_Outer %arr_0 %87
+         %91 = OpLoad %uint %i_0
+         %93 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %91
+         %94 = OpLoad %Outer_std140 %93
+         %90 = OpFunctionCall %Outer %conv_Outer %94
+               OpStore %89 %90
+               OpBranch %77
+         %77 = OpLabel
+         %95 = OpLoad %uint %i_0
+         %96 = OpIAdd %uint %95 %uint_1
+               OpStore %i_0 %96
+               OpBranch %75
+         %76 = OpLabel
+         %97 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %97
+               OpFunctionEnd
+%load_a_inner_3_a_2_m = OpFunction %mat2v2half None %98
+        %100 = OpLabel
+        %106 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %109 = OpAccessChain %_ptr_Uniform_v2half %106 %uint_0
+        %110 = OpLoad %v2half %109
+        %112 = OpAccessChain %_ptr_Uniform_v2half %106 %uint_1
+        %113 = OpLoad %v2half %112
+        %114 = OpCompositeConstruct %mat2v2half %110 %113
+               OpReturnValue %114
+               OpFunctionEnd
+          %f = OpFunction %void None %115
+        %118 = OpLabel
+        %121 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %122 = OpLoad %_arr_Outer_std140_uint_4 %121
+        %119 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %122
+        %125 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %126 = OpLoad %Outer_std140 %125
+        %123 = OpFunctionCall %Outer %conv_Outer %126
+        %129 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %130 = OpLoad %_arr_Inner_std140_uint_4 %129
+        %127 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %130
+        %132 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %133 = OpLoad %Inner_std140 %132
+        %131 = OpFunctionCall %Inner %conv_Inner %133
+        %134 = OpFunctionCall %mat2v2half %load_a_inner_3_a_2_m
+        %135 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %136 = OpLoad %v2half %135
+        %138 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %32
+        %139 = OpLoad %half %138
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..d662d25
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,29 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat2x2<f16> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec2<f16> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f16 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl
new file mode 100644
index 0000000..5297789
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].yx);
+    let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..bb82c5d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 2> t = transpose(tint_symbol(u, 260u));
+  uint ubo_load_2 = u[0].z;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))).yx);
+  uint ubo_load_3 = u[0].z;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f83ab60
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,26 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 2> t = transpose(tint_symbol(u, 260u));
+  uint ubo_load_2 = u[0].z;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))).yx);
+  uint ubo_load_3 = u[0].z;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002973821CED0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..392c9d0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,91 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  int after;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  int after;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+f16mat2 load_u_inner_2_m() {
+  return f16mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  f16mat2 t = transpose(load_u_inner_2_m());
+  float16_t l = length(u.inner[0u].m_1.yx);
+  float16_t a = abs(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..f2a8adb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half2x2 m;
+  /* 0x000c */ tint_array<int8_t, 52> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  half2x2 const t = transpose((*(tint_symbol))[2].m);
+  half const l = length(half2((*(tint_symbol))[0].m[1]).yx);
+  half const a = fabs(half2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..3bdf3d7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,79 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %36 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v2half = OpTypeMatrix %v2half 2
+         %11 = OpTypeFunction %mat2v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+         %37 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat2v2half None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v2half %19 %uint_1
+         %24 = OpLoad %v2half %23
+         %26 = OpAccessChain %_ptr_Uniform_v2half %19 %uint_2
+         %27 = OpLoad %v2half %26
+         %28 = OpCompositeConstruct %mat2v2half %24 %27
+               OpReturnValue %28
+               OpFunctionEnd
+          %f = OpFunction %void None %29
+         %32 = OpLabel
+         %34 = OpFunctionCall %mat2v2half %load_u_inner_2_m
+         %33 = OpTranspose %mat2v2half %34
+         %38 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %37 %uint_2
+         %39 = OpLoad %v2half %38
+         %40 = OpVectorShuffle %v2half %39 %39 1 0
+         %35 = OpExtInst %half %36 Length %40
+         %42 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %37 %uint_2
+         %43 = OpLoad %v2half %42
+         %44 = OpVectorShuffle %v2half %43 %43 1 0
+         %45 = OpCompositeExtract %half %44 0
+         %41 = OpExtInst %half %36 FAbs %45
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..ade5de6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].yx);
+  let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl
new file mode 100644
index 0000000..1ddc722
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl

@@ -0,0 +1,25 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat2x2<f16>) {}
+fn d(v : vec2<f16>) {}
+fn e(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].yx);
+    e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..03670a0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,62 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 2, 2> m) {
+}
+
+void d(vector<float16_t, 2> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 260u));
+  uint ubo_load_2 = u[0].z;
+  d(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))).yx);
+  uint ubo_load_3 = u[0].z;
+  e(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ca733ad
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,67 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 2, 2> m) {
+}
+
+void d(vector<float16_t, 2> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 260u));
+  uint ubo_load_2 = u[0].z;
+  d(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))).yx);
+  uint ubo_load_3 = u[0].z;
+  e(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000029EA87296E0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..5ddb289
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,122 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  int after;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  int after;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(f16mat2 m) {
+}
+
+void d(f16vec2 v) {
+}
+
+void e(float16_t f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat2(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.after, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25, val.pad_26, val.pad_27);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2 load_u_inner_2_m() {
+  return f16mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.yx);
+  e(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..377d72a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half2x2 m;
+  /* 0x000c */ tint_array<int8_t, 52> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(half2x2 m) {
+}
+
+void d(half2 v) {
+}
+
+void e(half f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(half2((*(tint_symbol))[0].m[1]).yx);
+  e(half2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..14ba4ae
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,204 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 119
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat2v2half = OpTypeMatrix %v2half 2
+          %S = OpTypeStruct %int %mat2v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat2v2half
+         %27 = OpTypeFunction %void %v2half
+         %31 = OpTypeFunction %void %half
+         %35 = OpTypeFunction %S %S_std140
+         %45 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %51 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %54 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %67 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %80 = OpTypeFunction %mat2v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+         %96 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat2v2half
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v2half
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %half
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v2half %val 1
+         %41 = OpCompositeExtract %v2half %val 2
+         %42 = OpCompositeConstruct %mat2v2half %40 %41
+         %43 = OpCompositeExtract %int %val 3
+         %44 = OpCompositeConstruct %S %39 %42 %43
+               OpReturnValue %44
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %45
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %48 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %51
+          %i = OpVariable %_ptr_Function_uint Function %54
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %67
+               OpBranch %55
+         %55 = OpLabel
+               OpLoopMerge %56 %57 None
+               OpBranch %58
+         %58 = OpLabel
+         %60 = OpLoad %uint %i
+         %61 = OpULessThan %bool %60 %uint_4
+         %59 = OpLogicalNot %bool %61
+               OpSelectionMerge %63 None
+               OpBranchConditional %59 %64 %63
+         %64 = OpLabel
+               OpBranch %56
+         %63 = OpLabel
+               OpStore %var_for_index %val_0
+         %68 = OpLoad %uint %i
+         %70 = OpAccessChain %_ptr_Function_S %arr %68
+         %72 = OpLoad %uint %i
+         %74 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %72
+         %75 = OpLoad %S_std140 %74
+         %71 = OpFunctionCall %S %conv_S %75
+               OpStore %70 %71
+               OpBranch %57
+         %57 = OpLabel
+         %76 = OpLoad %uint %i
+         %78 = OpIAdd %uint %76 %uint_1
+               OpStore %i %78
+               OpBranch %55
+         %56 = OpLabel
+         %79 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %79
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v2half None %80
+         %82 = OpLabel
+         %87 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %90 = OpAccessChain %_ptr_Uniform_v2half %87 %uint_1
+         %91 = OpLoad %v2half %90
+         %93 = OpAccessChain %_ptr_Uniform_v2half %87 %uint_2
+         %94 = OpLoad %v2half %93
+         %95 = OpCompositeConstruct %mat2v2half %91 %94
+               OpReturnValue %95
+               OpFunctionEnd
+          %f = OpFunction %void None %96
+         %98 = OpLabel
+        %102 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %103 = OpLoad %_arr_S_std140_uint_4 %102
+        %100 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %103
+         %99 = OpFunctionCall %void %a %100
+        %106 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %107 = OpLoad %S_std140 %106
+        %105 = OpFunctionCall %S %conv_S %107
+        %104 = OpFunctionCall %void %b %105
+        %109 = OpFunctionCall %mat2v2half %load_u_inner_2_m
+        %108 = OpFunctionCall %void %c %109
+        %111 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %54 %uint_2
+        %112 = OpLoad %v2half %111
+        %113 = OpVectorShuffle %v2half %112 %112 1 0
+        %110 = OpFunctionCall %void %d %113
+        %115 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %54 %uint_2
+        %116 = OpLoad %v2half %115
+        %117 = OpVectorShuffle %v2half %116 %116 1 0
+        %118 = OpCompositeExtract %half %117 0
+        %114 = OpFunctionCall %void %e %118
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..3e7d522
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat2x2<f16>) {
+}
+
+fn d(v : vec2<f16>) {
+}
+
+fn e(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].yx);
+  e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl
new file mode 100644
index 0000000..e25cc5b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5ff5a75
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,46 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 2, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 260u);
+  uint ubo_load_2 = u[0].z;
+  p[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))).yx;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e1bce07
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,51 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 2, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 260u);
+  uint ubo_load_2 = u[0].z;
+  p[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))).yx;
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000172713CA4A0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..cecf067
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,107 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  int after;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  int after;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat2(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.after, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25, val.pad_26, val.pad_27);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2 load_u_inner_2_m() {
+  return f16mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..a5d8527
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half2x2 m;
+  /* 0x000c */ tint_array<int8_t, 52> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = half2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..45b249e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,171 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 101
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v2half = OpTypeMatrix %v2half 2
+          %S = OpTypeStruct %int %mat2v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %35 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %48 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %61 = OpTypeFunction %mat2v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %void = OpTypeVoid
+         %77 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat2v2half = OpTypePointer Private %mat2v2half
+         %95 = OpConstantNull %int
+%_ptr_Private_v2half = OpTypePointer Private %v2half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2half %val 1
+         %23 = OpCompositeExtract %v2half %val 2
+         %24 = OpCompositeConstruct %mat2v2half %22 %23
+         %25 = OpCompositeExtract %int %val 3
+         %26 = OpCompositeConstruct %S %21 %24 %25
+               OpReturnValue %26
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %27
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %30 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %35
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %48
+               OpBranch %36
+         %36 = OpLabel
+               OpLoopMerge %37 %38 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %uint %i
+         %42 = OpULessThan %bool %41 %uint_4
+         %40 = OpLogicalNot %bool %42
+               OpSelectionMerge %44 None
+               OpBranchConditional %40 %45 %44
+         %45 = OpLabel
+               OpBranch %37
+         %44 = OpLabel
+               OpStore %var_for_index %val_0
+         %49 = OpLoad %uint %i
+         %51 = OpAccessChain %_ptr_Function_S %arr %49
+         %53 = OpLoad %uint %i
+         %55 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %53
+         %56 = OpLoad %S_std140 %55
+         %52 = OpFunctionCall %S %conv_S %56
+               OpStore %51 %52
+               OpBranch %38
+         %38 = OpLabel
+         %57 = OpLoad %uint %i
+         %59 = OpIAdd %uint %57 %uint_1
+               OpStore %i %59
+               OpBranch %36
+         %37 = OpLabel
+         %60 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %60
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v2half None %61
+         %63 = OpLabel
+         %68 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %71 = OpAccessChain %_ptr_Uniform_v2half %68 %uint_1
+         %72 = OpLoad %v2half %71
+         %74 = OpAccessChain %_ptr_Uniform_v2half %68 %uint_2
+         %75 = OpLoad %v2half %74
+         %76 = OpCompositeConstruct %mat2v2half %72 %75
+               OpReturnValue %76
+               OpFunctionEnd
+          %f = OpFunction %void None %77
+         %80 = OpLabel
+         %83 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %84 = OpLoad %_arr_S_std140_uint_4 %83
+         %81 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %84
+               OpStore %p %81
+         %87 = OpAccessChain %_ptr_Private_S %p %int_1
+         %89 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %90 = OpLoad %S_std140 %89
+         %88 = OpFunctionCall %S %conv_S %90
+               OpStore %87 %88
+         %93 = OpAccessChain %_ptr_Private_mat2v2half %p %int_3 %uint_1
+         %94 = OpFunctionCall %mat2v2half %load_u_inner_2_m
+               OpStore %93 %94
+         %97 = OpAccessChain %_ptr_Private_v2half %p %int_1 %uint_1 %95
+         %98 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %35 %uint_2
+         %99 = OpLoad %v2half %98
+        %100 = OpVectorShuffle %v2half %99 %99 1 0
+               OpStore %97 %100
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..ed6df05
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl
new file mode 100644
index 0000000..fc72568
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e217232
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,66 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 4u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 2, 2> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 4u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 388u, tint_symbol_8(u, 260u));
+  uint ubo_load_2 = u[0].z;
+  s.Store<vector<float16_t, 2> >(132u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))).yx);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..fedbc87
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,71 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 4u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 2, 2> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 4u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 388u, tint_symbol_8(u, 260u));
+  uint ubo_load_2 = u[0].z;
+  s.Store<vector<float16_t, 2> >(132u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))).yx);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A0472DA2E0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..7980194
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,110 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  int after;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  int after;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat2(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.after, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25, val.pad_26, val.pad_27);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2 load_u_inner_2_m() {
+  return f16mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..9337ac0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half2x2 m;
+  /* 0x000c */ tint_array<int8_t, 52> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = half2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..b7cbc1b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,180 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 104
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v2half = OpTypeMatrix %v2half 2
+          %S = OpTypeStruct %int %mat2v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %33 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %36 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %49 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %62 = OpTypeFunction %mat2v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %void = OpTypeVoid
+         %78 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat2v2half = OpTypePointer StorageBuffer %mat2v2half
+         %98 = OpConstantNull %int
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2half %val 1
+         %23 = OpCompositeExtract %v2half %val 2
+         %24 = OpCompositeConstruct %mat2v2half %22 %23
+         %25 = OpCompositeExtract %int %val 3
+         %26 = OpCompositeConstruct %S %21 %24 %25
+               OpReturnValue %26
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %27
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %30 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %33
+          %i = OpVariable %_ptr_Function_uint Function %36
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
+               OpBranch %37
+         %37 = OpLabel
+               OpLoopMerge %38 %39 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %uint %i
+         %43 = OpULessThan %bool %42 %uint_4
+         %41 = OpLogicalNot %bool %43
+               OpSelectionMerge %45 None
+               OpBranchConditional %41 %46 %45
+         %46 = OpLabel
+               OpBranch %38
+         %45 = OpLabel
+               OpStore %var_for_index %val_0
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_S %arr %50
+         %54 = OpLoad %uint %i
+         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
+         %57 = OpLoad %S_std140 %56
+         %53 = OpFunctionCall %S %conv_S %57
+               OpStore %52 %53
+               OpBranch %39
+         %39 = OpLabel
+         %58 = OpLoad %uint %i
+         %60 = OpIAdd %uint %58 %uint_1
+               OpStore %i %60
+               OpBranch %37
+         %38 = OpLabel
+         %61 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %61
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v2half None %62
+         %64 = OpLabel
+         %69 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %72 = OpAccessChain %_ptr_Uniform_v2half %69 %uint_1
+         %73 = OpLoad %v2half %72
+         %75 = OpAccessChain %_ptr_Uniform_v2half %69 %uint_2
+         %76 = OpLoad %v2half %75
+         %77 = OpCompositeConstruct %mat2v2half %73 %76
+               OpReturnValue %77
+               OpFunctionEnd
+          %f = OpFunction %void None %78
+         %81 = OpLabel
+         %83 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %86 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %87 = OpLoad %_arr_S_std140_uint_4 %86
+         %84 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %87
+               OpStore %83 %84
+         %90 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %92 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %93 = OpLoad %S_std140 %92
+         %91 = OpFunctionCall %S %conv_S %93
+               OpStore %90 %91
+         %96 = OpAccessChain %_ptr_StorageBuffer_mat2v2half %s %uint_0 %int_3 %uint_1
+         %97 = OpFunctionCall %mat2v2half %load_u_inner_2_m
+               OpStore %96 %97
+        %100 = OpAccessChain %_ptr_StorageBuffer_v2half %s %uint_0 %int_1 %uint_1 %98
+        %101 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %36 %uint_2
+        %102 = OpLoad %v2half %101
+        %103 = OpVectorShuffle %v2half %102 %102 1 0
+               OpStore %100 %103
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..4f5e53f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..e296fe2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..15c8322
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,62 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 2> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 4u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 260u);
+  uint ubo_load_2 = u[0].z;
+  w[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..50372ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,67 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 2> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 4u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 260u);
+  uint ubo_load_2 = u[0].z;
+  w[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001CABCCF0160(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..0ea73a9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,115 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  int after;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  int after;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat2(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.after, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25, val.pad_26, val.pad_27);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2 load_u_inner_2_m() {
+  return f16mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, f16mat2(f16vec2(0.0hf), f16vec2(0.0hf)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..6411d47
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,47 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half2x2 m;
+  /* 0x000c */ tint_array<int8_t, 52> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = half2((*(tint_symbol_2))[0].m[1]).yx;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..246193e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,214 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 126
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v2half = OpTypeMatrix %v2half 2
+          %S = OpTypeStruct %int %mat2v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %34 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat2v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %void = OpTypeVoid
+         %79 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %97 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat2v2half = OpTypePointer Workgroup %mat2v2half
+        %115 = OpConstantNull %int
+%_ptr_Workgroup_v2half = OpTypePointer Workgroup %v2half
+        %121 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v2half %val 1
+         %24 = OpCompositeExtract %v2half %val 2
+         %25 = OpCompositeConstruct %mat2v2half %23 %24
+         %26 = OpCompositeExtract %int %val 3
+         %27 = OpCompositeConstruct %S %22 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v2half None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v2half %70 %uint_1
+         %74 = OpLoad %v2half %73
+         %76 = OpAccessChain %_ptr_Uniform_v2half %70 %uint_2
+         %77 = OpLoad %v2half %76
+         %78 = OpCompositeConstruct %mat2v2half %74 %77
+               OpReturnValue %78
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %79
+%local_invocation_index = OpFunctionParameter %uint
+         %83 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %37
+               OpStore %idx %local_invocation_index
+               OpBranch %85
+         %85 = OpLabel
+               OpLoopMerge %86 %87 None
+               OpBranch %88
+         %88 = OpLabel
+         %90 = OpLoad %uint %idx
+         %91 = OpULessThan %bool %90 %uint_4
+         %89 = OpLogicalNot %bool %91
+               OpSelectionMerge %92 None
+               OpBranchConditional %89 %93 %92
+         %93 = OpLabel
+               OpBranch %86
+         %92 = OpLabel
+         %94 = OpLoad %uint %idx
+         %96 = OpAccessChain %_ptr_Workgroup_S %w %94
+               OpStore %96 %97
+               OpBranch %87
+         %87 = OpLabel
+         %98 = OpLoad %uint %idx
+         %99 = OpIAdd %uint %98 %uint_1
+               OpStore %idx %99
+               OpBranch %85
+         %86 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %104 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %105 = OpLoad %_arr_S_std140_uint_4 %104
+        %102 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %105
+               OpStore %w %102
+        %107 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %109 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %110 = OpLoad %S_std140 %109
+        %108 = OpFunctionCall %S %conv_S %110
+               OpStore %107 %108
+        %113 = OpAccessChain %_ptr_Workgroup_mat2v2half %w %int_3 %uint_1
+        %114 = OpFunctionCall %mat2v2half %load_u_inner_2_m
+               OpStore %113 %114
+        %117 = OpAccessChain %_ptr_Workgroup_v2half %w %int_1 %uint_1 %115
+        %118 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %37 %uint_2
+        %119 = OpLoad %v2half %118
+        %120 = OpVectorShuffle %v2half %119 %119 1 0
+               OpStore %117 %120
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %121
+        %123 = OpLabel
+        %125 = OpLoad %uint %local_invocation_index_1
+        %124 = OpFunctionCall %void %f_inner %125
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..572d1fc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..51aa7c6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,32 @@
+struct Inner {
+  @size(64)
+  m : mat2x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat2x2<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec2<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e08f4d5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,78 @@
+struct Inner {
+  float2x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x2 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float2x2 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_2 = a[scalar_offset_2 / 4];
+  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_3 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_3 / 4][scalar_offset_3 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e08f4d5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,78 @@
+struct Inner {
+  float2x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x2 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float2x2 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_2 = a[scalar_offset_2 / 4];
+  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_3 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_3 / 4][scalar_offset_3 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..92467b6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,146 @@
+#version 310 es
+
+struct Inner {
+  mat2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Inner_std140 {
+  vec2 m_0;
+  vec2 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(mat2(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return mat2(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1);
+}
+
+vec2 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    default: {
+      return vec2(0.0f);
+      break;
+    }
+  }
+}
+
+float load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    default: {
+      return 0.0f;
+      break;
+    }
+  }
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  mat2 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  vec2 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  mat2 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  vec2 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..56ba8da
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float2x2 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  float2x2 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  float2 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..ca17b0c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,306 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 193
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%Inner_std140 = OpTypeStruct %v2float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+%mat2v2float = OpTypeMatrix %v2float 2
+      %Inner = OpTypeStruct %mat2v2float
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %33 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %40 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %43 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %56 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %69 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %77 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %84 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %97 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %109 = OpTypeFunction %mat2v2float %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+        %126 = OpTypeFunction %v2float %uint %uint %uint
+        %140 = OpConstantNull %v2float
+        %141 = OpTypeFunction %float %uint %uint %uint %uint
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %157 = OpConstantNull %float
+       %void = OpTypeVoid
+        %158 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v2float %val 0
+         %30 = OpCompositeExtract %v2float %val 1
+         %31 = OpCompositeConstruct %mat2v2float %29 %30
+         %32 = OpCompositeConstruct %Inner %31
+               OpReturnValue %32
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %33
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %37 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %40
+        %i_0 = OpVariable %_ptr_Function_uint Function %43
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %56
+               OpBranch %44
+         %44 = OpLabel
+               OpLoopMerge %45 %46 None
+               OpBranch %47
+         %47 = OpLabel
+         %49 = OpLoad %uint %i_0
+         %50 = OpULessThan %bool %49 %uint_4
+         %48 = OpLogicalNot %bool %50
+               OpSelectionMerge %52 None
+               OpBranchConditional %48 %53 %52
+         %53 = OpLabel
+               OpBranch %45
+         %52 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %57 = OpLoad %uint %i_0
+         %59 = OpAccessChain %_ptr_Function_Inner %arr %57
+         %61 = OpLoad %uint %i_0
+         %63 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %61
+         %64 = OpLoad %Inner_std140 %63
+         %60 = OpFunctionCall %Inner %conv_Inner %64
+               OpStore %59 %60
+               OpBranch %46
+         %46 = OpLabel
+         %65 = OpLoad %uint %i_0
+         %67 = OpIAdd %uint %65 %uint_1
+               OpStore %i_0 %67
+               OpBranch %44
+         %45 = OpLabel
+         %68 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %68
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %69
+      %val_1 = OpFunctionParameter %Outer_std140
+         %73 = OpLabel
+         %75 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %74 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %75
+         %76 = OpCompositeConstruct %Outer %74
+               OpReturnValue %76
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %77
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %81 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %84
+        %i_1 = OpVariable %_ptr_Function_uint Function %43
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %97
+               OpBranch %86
+         %86 = OpLabel
+               OpLoopMerge %87 %88 None
+               OpBranch %89
+         %89 = OpLabel
+         %91 = OpLoad %uint %i_1
+         %92 = OpULessThan %bool %91 %uint_4
+         %90 = OpLogicalNot %bool %92
+               OpSelectionMerge %93 None
+               OpBranchConditional %90 %94 %93
+         %94 = OpLabel
+               OpBranch %87
+         %93 = OpLabel
+               OpStore %var_for_index %val_2
+         %98 = OpLoad %uint %i_1
+        %100 = OpAccessChain %_ptr_Function_Outer %arr_0 %98
+        %102 = OpLoad %uint %i_1
+        %104 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %102
+        %105 = OpLoad %Outer_std140 %104
+        %101 = OpFunctionCall %Outer %conv_Outer %105
+               OpStore %100 %101
+               OpBranch %88
+         %88 = OpLabel
+        %106 = OpLoad %uint %i_1
+        %107 = OpIAdd %uint %106 %uint_1
+               OpStore %i_1 %107
+               OpBranch %86
+         %87 = OpLabel
+        %108 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %108
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m = OpFunction %mat2v2float None %109
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %113 = OpLabel
+        %117 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
+        %120 = OpAccessChain %_ptr_Uniform_v2float %117 %uint_0
+        %121 = OpLoad %v2float %120
+        %123 = OpAccessChain %_ptr_Uniform_v2float %117 %uint_1
+        %124 = OpLoad %v2float %123
+        %125 = OpCompositeConstruct %mat2v2float %121 %124
+               OpReturnValue %125
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2 = OpFunction %v2float None %126
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %131 = OpLabel
+               OpSelectionMerge %132 None
+               OpSwitch %p2 %133 0 %134 1 %135
+        %134 = OpLabel
+        %136 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %137 = OpLoad %v2float %136
+               OpReturnValue %137
+        %135 = OpLabel
+        %138 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %139 = OpLoad %v2float %138
+               OpReturnValue %139
+        %133 = OpLabel
+               OpReturnValue %140
+        %132 = OpLabel
+               OpReturnValue %140
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %float None %141
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %147 = OpLabel
+               OpSelectionMerge %148 None
+               OpSwitch %p2_0 %149 0 %150 1 %151
+        %150 = OpLabel
+        %153 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %154 = OpLoad %float %153
+               OpReturnValue %154
+        %151 = OpLabel
+        %155 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %156 = OpLoad %float %155
+               OpReturnValue %156
+        %149 = OpLabel
+               OpReturnValue %157
+        %148 = OpLabel
+               OpReturnValue %157
+               OpFunctionEnd
+          %f = OpFunction %void None %158
+        %161 = OpLabel
+        %162 = OpFunctionCall %int %i
+        %163 = OpFunctionCall %int %i
+        %164 = OpFunctionCall %int %i
+        %167 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %168 = OpLoad %_arr_Outer_std140_uint_4 %167
+        %165 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %168
+        %171 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %162
+        %172 = OpLoad %Outer_std140 %171
+        %169 = OpFunctionCall %Outer %conv_Outer %172
+        %175 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %162 %uint_0
+        %176 = OpLoad %_arr_Inner_std140_uint_4 %175
+        %173 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %176
+        %178 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %162 %uint_0 %163
+        %179 = OpLoad %Inner_std140 %178
+        %177 = OpFunctionCall %Inner %conv_Inner %179
+        %181 = OpBitcast %uint %162
+        %182 = OpBitcast %uint %163
+        %180 = OpFunctionCall %mat2v2float %load_a_inner_p0_a_p1_m %181 %182
+        %184 = OpBitcast %uint %162
+        %185 = OpBitcast %uint %163
+        %186 = OpBitcast %uint %164
+        %183 = OpFunctionCall %v2float %load_a_inner_p0_a_p1_m_p2 %184 %185 %186
+        %187 = OpFunctionCall %int %i
+        %189 = OpBitcast %uint %162
+        %190 = OpBitcast %uint %163
+        %191 = OpBitcast %uint %164
+        %192 = OpBitcast %uint %187
+        %188 = OpFunctionCall %float %load_a_inner_p0_a_p1_m_p2_p3 %189 %190 %191 %192
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..ac4c714
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+struct Inner {
+  @size(64)
+  m : mat2x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat2x2<f32> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec2<f32> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..98f52eb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,29 @@
+struct Inner {
+  @size(64)
+  m : mat2x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat2x2<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec2<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e732e83
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,62 @@
+struct Inner {
+  float2x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float2x2 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float2x2 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float2 l_a_3_a_2_m_1 = asfloat(a[56].zw);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[56].z);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e732e83
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,62 @@
+struct Inner {
+  float2x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float2x2 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float2x2 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float2 l_a_3_a_2_m_1 = asfloat(a[56].zw);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[56].z);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..41496f7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,100 @@
+#version 310 es
+
+struct Inner {
+  mat2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Inner_std140 {
+  vec2 m_0;
+  vec2 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(mat2(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_a_inner_3_a_2_m() {
+  return mat2(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1);
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  mat2 p_a_3_a_2_m = load_a_inner_3_a_2_m();
+  vec2 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  mat2 l_a_3_a_2_m = load_a_inner_3_a_2_m();
+  vec2 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..9b0f4f1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float2x2 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  float2x2 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  float2 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..120d1f3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,223 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 140
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%Inner_std140 = OpTypeStruct %v2float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+      %Inner = OpTypeStruct %mat2v2float
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %22 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %29 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %32 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %45 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %58 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %66 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %73 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %86 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+         %98 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+        %115 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v2float %val 0
+         %19 = OpCompositeExtract %v2float %val 1
+         %20 = OpCompositeConstruct %mat2v2float %18 %19
+         %21 = OpCompositeConstruct %Inner %20
+               OpReturnValue %21
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %22
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %26 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %29
+          %i = OpVariable %_ptr_Function_uint Function %32
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %45
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %34 %35 None
+               OpBranch %36
+         %36 = OpLabel
+         %38 = OpLoad %uint %i
+         %39 = OpULessThan %bool %38 %uint_4
+         %37 = OpLogicalNot %bool %39
+               OpSelectionMerge %41 None
+               OpBranchConditional %37 %42 %41
+         %42 = OpLabel
+               OpBranch %34
+         %41 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_Inner %arr %46
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %50
+         %53 = OpLoad %Inner_std140 %52
+         %49 = OpFunctionCall %Inner %conv_Inner %53
+               OpStore %48 %49
+               OpBranch %35
+         %35 = OpLabel
+         %54 = OpLoad %uint %i
+         %56 = OpIAdd %uint %54 %uint_1
+               OpStore %i %56
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %57
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %58
+      %val_1 = OpFunctionParameter %Outer_std140
+         %62 = OpLabel
+         %64 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %63 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %64
+         %65 = OpCompositeConstruct %Outer %63
+               OpReturnValue %65
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %66
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %70 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %73
+        %i_0 = OpVariable %_ptr_Function_uint Function %32
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %86
+               OpBranch %75
+         %75 = OpLabel
+               OpLoopMerge %76 %77 None
+               OpBranch %78
+         %78 = OpLabel
+         %80 = OpLoad %uint %i_0
+         %81 = OpULessThan %bool %80 %uint_4
+         %79 = OpLogicalNot %bool %81
+               OpSelectionMerge %82 None
+               OpBranchConditional %79 %83 %82
+         %83 = OpLabel
+               OpBranch %76
+         %82 = OpLabel
+               OpStore %var_for_index %val_2
+         %87 = OpLoad %uint %i_0
+         %89 = OpAccessChain %_ptr_Function_Outer %arr_0 %87
+         %91 = OpLoad %uint %i_0
+         %93 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %91
+         %94 = OpLoad %Outer_std140 %93
+         %90 = OpFunctionCall %Outer %conv_Outer %94
+               OpStore %89 %90
+               OpBranch %77
+         %77 = OpLabel
+         %95 = OpLoad %uint %i_0
+         %96 = OpIAdd %uint %95 %uint_1
+               OpStore %i_0 %96
+               OpBranch %75
+         %76 = OpLabel
+         %97 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %97
+               OpFunctionEnd
+%load_a_inner_3_a_2_m = OpFunction %mat2v2float None %98
+        %100 = OpLabel
+        %106 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %109 = OpAccessChain %_ptr_Uniform_v2float %106 %uint_0
+        %110 = OpLoad %v2float %109
+        %112 = OpAccessChain %_ptr_Uniform_v2float %106 %uint_1
+        %113 = OpLoad %v2float %112
+        %114 = OpCompositeConstruct %mat2v2float %110 %113
+               OpReturnValue %114
+               OpFunctionEnd
+          %f = OpFunction %void None %115
+        %118 = OpLabel
+        %121 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %122 = OpLoad %_arr_Outer_std140_uint_4 %121
+        %119 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %122
+        %125 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %126 = OpLoad %Outer_std140 %125
+        %123 = OpFunctionCall %Outer %conv_Outer %126
+        %129 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %130 = OpLoad %_arr_Inner_std140_uint_4 %129
+        %127 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %130
+        %132 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %133 = OpLoad %Inner_std140 %132
+        %131 = OpFunctionCall %Inner %conv_Inner %133
+        %134 = OpFunctionCall %mat2v2float %load_a_inner_3_a_2_m
+        %135 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %136 = OpLoad %v2float %135
+        %138 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %32
+        %139 = OpLoad %float %138
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..ca6c191
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,27 @@
+struct Inner {
+  @size(64)
+  m : mat2x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat2x2<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec2<f32> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl
new file mode 100644
index 0000000..625dd07
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].yx);
+    let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a6813b0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float2x2 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x2 t = transpose(tint_symbol(u, 264u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a6813b0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float2x2 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x2 t = transpose(tint_symbol(u, 264u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..eeb3743
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,86 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+mat2 load_u_inner_2_m() {
+  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  mat2 t = transpose(load_u_inner_2_m());
+  float l = length(u.inner[0u].m_1.yx);
+  float a = abs(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..b2a40b3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float2x2 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float2x2 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float2((*(tint_symbol))[0].m[1]).yx);
+  float const a = fabs(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..44a6592
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,75 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+         %36 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+         %11 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+         %37 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat2v2float None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_1
+         %24 = OpLoad %v2float %23
+         %26 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_2
+         %27 = OpLoad %v2float %26
+         %28 = OpCompositeConstruct %mat2v2float %24 %27
+               OpReturnValue %28
+               OpFunctionEnd
+          %f = OpFunction %void None %29
+         %32 = OpLabel
+         %34 = OpFunctionCall %mat2v2float %load_u_inner_2_m
+         %33 = OpTranspose %mat2v2float %34
+         %38 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
+         %39 = OpLoad %v2float %38
+         %40 = OpVectorShuffle %v2float %39 %39 1 0
+         %35 = OpExtInst %float %36 Length %40
+         %42 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
+         %43 = OpLoad %v2float %42
+         %44 = OpVectorShuffle %v2float %43 %43 1 0
+         %45 = OpCompositeExtract %float %44 0
+         %41 = OpExtInst %float %36 FAbs %45
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..180955f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].yx);
+  let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl
new file mode 100644
index 0000000..401e56d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl

@@ -0,0 +1,23 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat2x2<f32>) {}
+fn d(v : vec2<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].yx);
+    e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e8f7c79
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float2x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float2x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e8f7c79
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float2x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float2x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..28796e7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,117 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat2 m) {
+}
+
+void d(vec2 v) {
+}
+
+void e(float f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat2(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_u_inner_2_m() {
+  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.yx);
+  e(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..caa1fb9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float2x2 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float2x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float2((*(tint_symbol))[0].m[1]).yx);
+  e(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..6f93912
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,200 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 119
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+%mat2v2float = OpTypeMatrix %v2float 2
+          %S = OpTypeStruct %int %mat2v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat2v2float
+         %27 = OpTypeFunction %void %v2float
+         %31 = OpTypeFunction %void %float
+         %35 = OpTypeFunction %S %S_std140
+         %45 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %51 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %54 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %67 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %80 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+         %96 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat2v2float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v2float
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %float
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v2float %val 1
+         %41 = OpCompositeExtract %v2float %val 2
+         %42 = OpCompositeConstruct %mat2v2float %40 %41
+         %43 = OpCompositeExtract %int %val 3
+         %44 = OpCompositeConstruct %S %39 %42 %43
+               OpReturnValue %44
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %45
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %48 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %51
+          %i = OpVariable %_ptr_Function_uint Function %54
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %67
+               OpBranch %55
+         %55 = OpLabel
+               OpLoopMerge %56 %57 None
+               OpBranch %58
+         %58 = OpLabel
+         %60 = OpLoad %uint %i
+         %61 = OpULessThan %bool %60 %uint_4
+         %59 = OpLogicalNot %bool %61
+               OpSelectionMerge %63 None
+               OpBranchConditional %59 %64 %63
+         %64 = OpLabel
+               OpBranch %56
+         %63 = OpLabel
+               OpStore %var_for_index %val_0
+         %68 = OpLoad %uint %i
+         %70 = OpAccessChain %_ptr_Function_S %arr %68
+         %72 = OpLoad %uint %i
+         %74 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %72
+         %75 = OpLoad %S_std140 %74
+         %71 = OpFunctionCall %S %conv_S %75
+               OpStore %70 %71
+               OpBranch %57
+         %57 = OpLabel
+         %76 = OpLoad %uint %i
+         %78 = OpIAdd %uint %76 %uint_1
+               OpStore %i %78
+               OpBranch %55
+         %56 = OpLabel
+         %79 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %79
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v2float None %80
+         %82 = OpLabel
+         %87 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %90 = OpAccessChain %_ptr_Uniform_v2float %87 %uint_1
+         %91 = OpLoad %v2float %90
+         %93 = OpAccessChain %_ptr_Uniform_v2float %87 %uint_2
+         %94 = OpLoad %v2float %93
+         %95 = OpCompositeConstruct %mat2v2float %91 %94
+               OpReturnValue %95
+               OpFunctionEnd
+          %f = OpFunction %void None %96
+         %98 = OpLabel
+        %102 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %103 = OpLoad %_arr_S_std140_uint_4 %102
+        %100 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %103
+         %99 = OpFunctionCall %void %a %100
+        %106 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %107 = OpLoad %S_std140 %106
+        %105 = OpFunctionCall %S %conv_S %107
+        %104 = OpFunctionCall %void %b %105
+        %109 = OpFunctionCall %mat2v2float %load_u_inner_2_m
+        %108 = OpFunctionCall %void %c %109
+        %111 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %54 %uint_2
+        %112 = OpLoad %v2float %111
+        %113 = OpVectorShuffle %v2float %112 %112 1 0
+        %110 = OpFunctionCall %void %d %113
+        %115 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %54 %uint_2
+        %116 = OpLoad %v2float %115
+        %117 = OpVectorShuffle %v2float %116 %116 1 0
+        %118 = OpCompositeExtract %float %117 0
+        %114 = OpFunctionCall %void %e %118
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..e1d57cd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,32 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat2x2<f32>) {
+}
+
+fn d(v : vec2<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].yx);
+  e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl
new file mode 100644
index 0000000..68839fa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..be509ec
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,45 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float2x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..be509ec
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,45 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float2x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..9859836
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,102 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat2(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_u_inner_2_m() {
+  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..a1c599c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float2x2 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..067e7fd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,167 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 101
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+          %S = OpTypeStruct %int %mat2v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %35 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %48 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %61 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+         %77 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat2v2float = OpTypePointer Private %mat2v2float
+         %95 = OpConstantNull %int
+%_ptr_Private_v2float = OpTypePointer Private %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeConstruct %mat2v2float %22 %23
+         %25 = OpCompositeExtract %int %val 3
+         %26 = OpCompositeConstruct %S %21 %24 %25
+               OpReturnValue %26
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %27
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %30 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %35
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %48
+               OpBranch %36
+         %36 = OpLabel
+               OpLoopMerge %37 %38 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %uint %i
+         %42 = OpULessThan %bool %41 %uint_4
+         %40 = OpLogicalNot %bool %42
+               OpSelectionMerge %44 None
+               OpBranchConditional %40 %45 %44
+         %45 = OpLabel
+               OpBranch %37
+         %44 = OpLabel
+               OpStore %var_for_index %val_0
+         %49 = OpLoad %uint %i
+         %51 = OpAccessChain %_ptr_Function_S %arr %49
+         %53 = OpLoad %uint %i
+         %55 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %53
+         %56 = OpLoad %S_std140 %55
+         %52 = OpFunctionCall %S %conv_S %56
+               OpStore %51 %52
+               OpBranch %38
+         %38 = OpLabel
+         %57 = OpLoad %uint %i
+         %59 = OpIAdd %uint %57 %uint_1
+               OpStore %i %59
+               OpBranch %36
+         %37 = OpLabel
+         %60 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %60
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v2float None %61
+         %63 = OpLabel
+         %68 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %71 = OpAccessChain %_ptr_Uniform_v2float %68 %uint_1
+         %72 = OpLoad %v2float %71
+         %74 = OpAccessChain %_ptr_Uniform_v2float %68 %uint_2
+         %75 = OpLoad %v2float %74
+         %76 = OpCompositeConstruct %mat2v2float %72 %75
+               OpReturnValue %76
+               OpFunctionEnd
+          %f = OpFunction %void None %77
+         %80 = OpLabel
+         %83 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %84 = OpLoad %_arr_S_std140_uint_4 %83
+         %81 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %84
+               OpStore %p %81
+         %87 = OpAccessChain %_ptr_Private_S %p %int_1
+         %89 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %90 = OpLoad %S_std140 %89
+         %88 = OpFunctionCall %S %conv_S %90
+               OpStore %87 %88
+         %93 = OpAccessChain %_ptr_Private_mat2v2float %p %int_3 %uint_1
+         %94 = OpFunctionCall %mat2v2float %load_u_inner_2_m
+               OpStore %93 %94
+         %97 = OpAccessChain %_ptr_Private_v2float %p %int_1 %uint_1 %95
+         %98 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %35 %uint_2
+         %99 = OpLoad %v2float %98
+        %100 = OpVectorShuffle %v2float %99 %99 1 0
+               OpStore %97 %100
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..b7722ec
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl
new file mode 100644
index 0000000..7226605
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..bfcca34
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float2x2 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  s.Store2(136u, asuint(asfloat(u[1].xy).yx));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..bfcca34
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float2x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float2x2 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  s.Store2(136u, asuint(asfloat(u[1].xy).yx));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..4056465
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,105 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat2(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_u_inner_2_m() {
+  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..c949492
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float2x2 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..be8fa7d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,176 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 104
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+          %S = OpTypeStruct %int %mat2v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %33 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %36 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %49 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %62 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+         %78 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat2v2float = OpTypePointer StorageBuffer %mat2v2float
+         %98 = OpConstantNull %int
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeConstruct %mat2v2float %22 %23
+         %25 = OpCompositeExtract %int %val 3
+         %26 = OpCompositeConstruct %S %21 %24 %25
+               OpReturnValue %26
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %27
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %30 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %33
+          %i = OpVariable %_ptr_Function_uint Function %36
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
+               OpBranch %37
+         %37 = OpLabel
+               OpLoopMerge %38 %39 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %uint %i
+         %43 = OpULessThan %bool %42 %uint_4
+         %41 = OpLogicalNot %bool %43
+               OpSelectionMerge %45 None
+               OpBranchConditional %41 %46 %45
+         %46 = OpLabel
+               OpBranch %38
+         %45 = OpLabel
+               OpStore %var_for_index %val_0
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_S %arr %50
+         %54 = OpLoad %uint %i
+         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
+         %57 = OpLoad %S_std140 %56
+         %53 = OpFunctionCall %S %conv_S %57
+               OpStore %52 %53
+               OpBranch %39
+         %39 = OpLabel
+         %58 = OpLoad %uint %i
+         %60 = OpIAdd %uint %58 %uint_1
+               OpStore %i %60
+               OpBranch %37
+         %38 = OpLabel
+         %61 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %61
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v2float None %62
+         %64 = OpLabel
+         %69 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %72 = OpAccessChain %_ptr_Uniform_v2float %69 %uint_1
+         %73 = OpLoad %v2float %72
+         %75 = OpAccessChain %_ptr_Uniform_v2float %69 %uint_2
+         %76 = OpLoad %v2float %75
+         %77 = OpCompositeConstruct %mat2v2float %73 %76
+               OpReturnValue %77
+               OpFunctionEnd
+          %f = OpFunction %void None %78
+         %81 = OpLabel
+         %83 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %86 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %87 = OpLoad %_arr_S_std140_uint_4 %86
+         %84 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %87
+               OpStore %83 %84
+         %90 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %92 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %93 = OpLoad %S_std140 %92
+         %91 = OpFunctionCall %S %conv_S %93
+               OpStore %90 %91
+         %96 = OpAccessChain %_ptr_StorageBuffer_mat2v2float %s %uint_0 %int_3 %uint_1
+         %97 = OpFunctionCall %mat2v2float %load_u_inner_2_m
+               OpStore %96 %97
+        %100 = OpAccessChain %_ptr_StorageBuffer_v2float %s %uint_0 %int_1 %uint_1 %98
+        %101 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %36 %uint_2
+        %102 = OpLoad %v2float %101
+        %103 = OpVectorShuffle %v2float %102 %102 1 0
+               OpStore %100 %103
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..0205aaf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..af9a3b3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c0c70b3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,61 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x2 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c0c70b3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,61 @@
+struct S {
+  int before;
+  float2x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x2 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..727abb5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,110 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat2(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat2(0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat2 load_u_inner_2_m() {
+  return mat2(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, mat2(vec2(0.0f), vec2(0.0f)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..667db2d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float2x2 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float2((*(tint_symbol_2))[0].m[1]).yx;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..68218cc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,210 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 126
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat2v2float = OpTypeMatrix %v2float 2
+          %S = OpTypeStruct %int %mat2v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %34 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat2v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+         %79 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %97 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat2v2float = OpTypePointer Workgroup %mat2v2float
+        %115 = OpConstantNull %int
+%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
+        %121 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v2float %val 1
+         %24 = OpCompositeExtract %v2float %val 2
+         %25 = OpCompositeConstruct %mat2v2float %23 %24
+         %26 = OpCompositeExtract %int %val 3
+         %27 = OpCompositeConstruct %S %22 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v2float None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_1
+         %74 = OpLoad %v2float %73
+         %76 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_2
+         %77 = OpLoad %v2float %76
+         %78 = OpCompositeConstruct %mat2v2float %74 %77
+               OpReturnValue %78
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %79
+%local_invocation_index = OpFunctionParameter %uint
+         %83 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %37
+               OpStore %idx %local_invocation_index
+               OpBranch %85
+         %85 = OpLabel
+               OpLoopMerge %86 %87 None
+               OpBranch %88
+         %88 = OpLabel
+         %90 = OpLoad %uint %idx
+         %91 = OpULessThan %bool %90 %uint_4
+         %89 = OpLogicalNot %bool %91
+               OpSelectionMerge %92 None
+               OpBranchConditional %89 %93 %92
+         %93 = OpLabel
+               OpBranch %86
+         %92 = OpLabel
+         %94 = OpLoad %uint %idx
+         %96 = OpAccessChain %_ptr_Workgroup_S %w %94
+               OpStore %96 %97
+               OpBranch %87
+         %87 = OpLabel
+         %98 = OpLoad %uint %idx
+         %99 = OpIAdd %uint %98 %uint_1
+               OpStore %idx %99
+               OpBranch %85
+         %86 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %104 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %105 = OpLoad %_arr_S_std140_uint_4 %104
+        %102 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %105
+               OpStore %w %102
+        %107 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %109 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %110 = OpLoad %S_std140 %109
+        %108 = OpFunctionCall %S %conv_S %110
+               OpStore %107 %108
+        %113 = OpAccessChain %_ptr_Workgroup_mat2v2float %w %int_3 %uint_1
+        %114 = OpFunctionCall %mat2v2float %load_u_inner_2_m
+               OpStore %113 %114
+        %117 = OpAccessChain %_ptr_Workgroup_v2float %w %int_1 %uint_1 %115
+        %118 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
+        %119 = OpLoad %v2float %118
+        %120 = OpVectorShuffle %v2float %119 %119 1 0
+               OpStore %117 %120
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %121
+        %123 = OpLabel
+        %125 = OpLoad %uint %local_invocation_index_1
+        %124 = OpFunctionCall %void %f_inner %125
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..645ac59
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x2_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat2x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..b2eab8f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat2x3<f16>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec3<f16>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f16             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..63633dc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,88 @@
+struct Inner {
+  matrix<float16_t, 2, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 3> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 2, 3> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_5 = a[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_a_i_a_i_m_i = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..1b18347
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,93 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 2, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 3> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 2, 3> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_5 = a[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_a_i_a_i_m_i = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021B8B701620(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..dbc6cf5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,147 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat2x3 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Inner_std140 {
+  f16vec3 m_0;
+  f16vec3 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat2x3(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x3 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return f16mat2x3(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1);
+}
+
+f16vec3 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    default: {
+      return f16vec3(0.0hf);
+      break;
+    }
+  }
+}
+
+float16_t load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    default: {
+      return 0.0hf;
+      break;
+    }
+  }
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat2x3 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  f16vec3 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat2x3 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  f16vec3 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float16_t l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..322c363
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half2x3 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  half2x3 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  half3 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  half const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..7710af2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,310 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 193
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%Inner_std140 = OpTypeStruct %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat2v3half = OpTypeMatrix %v3half 2
+      %Inner = OpTypeStruct %mat2v3half
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %33 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %40 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %43 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %56 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %69 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %77 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %84 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %97 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %109 = OpTypeFunction %mat2v3half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+        %126 = OpTypeFunction %v3half %uint %uint %uint
+        %140 = OpConstantNull %v3half
+        %141 = OpTypeFunction %half %uint %uint %uint %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %157 = OpConstantNull %half
+       %void = OpTypeVoid
+        %158 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v3half %val 0
+         %30 = OpCompositeExtract %v3half %val 1
+         %31 = OpCompositeConstruct %mat2v3half %29 %30
+         %32 = OpCompositeConstruct %Inner %31
+               OpReturnValue %32
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %33
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %37 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %40
+        %i_0 = OpVariable %_ptr_Function_uint Function %43
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %56
+               OpBranch %44
+         %44 = OpLabel
+               OpLoopMerge %45 %46 None
+               OpBranch %47
+         %47 = OpLabel
+         %49 = OpLoad %uint %i_0
+         %50 = OpULessThan %bool %49 %uint_4
+         %48 = OpLogicalNot %bool %50
+               OpSelectionMerge %52 None
+               OpBranchConditional %48 %53 %52
+         %53 = OpLabel
+               OpBranch %45
+         %52 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %57 = OpLoad %uint %i_0
+         %59 = OpAccessChain %_ptr_Function_Inner %arr %57
+         %61 = OpLoad %uint %i_0
+         %63 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %61
+         %64 = OpLoad %Inner_std140 %63
+         %60 = OpFunctionCall %Inner %conv_Inner %64
+               OpStore %59 %60
+               OpBranch %46
+         %46 = OpLabel
+         %65 = OpLoad %uint %i_0
+         %67 = OpIAdd %uint %65 %uint_1
+               OpStore %i_0 %67
+               OpBranch %44
+         %45 = OpLabel
+         %68 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %68
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %69
+      %val_1 = OpFunctionParameter %Outer_std140
+         %73 = OpLabel
+         %75 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %74 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %75
+         %76 = OpCompositeConstruct %Outer %74
+               OpReturnValue %76
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %77
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %81 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %84
+        %i_1 = OpVariable %_ptr_Function_uint Function %43
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %97
+               OpBranch %86
+         %86 = OpLabel
+               OpLoopMerge %87 %88 None
+               OpBranch %89
+         %89 = OpLabel
+         %91 = OpLoad %uint %i_1
+         %92 = OpULessThan %bool %91 %uint_4
+         %90 = OpLogicalNot %bool %92
+               OpSelectionMerge %93 None
+               OpBranchConditional %90 %94 %93
+         %94 = OpLabel
+               OpBranch %87
+         %93 = OpLabel
+               OpStore %var_for_index %val_2
+         %98 = OpLoad %uint %i_1
+        %100 = OpAccessChain %_ptr_Function_Outer %arr_0 %98
+        %102 = OpLoad %uint %i_1
+        %104 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %102
+        %105 = OpLoad %Outer_std140 %104
+        %101 = OpFunctionCall %Outer %conv_Outer %105
+               OpStore %100 %101
+               OpBranch %88
+         %88 = OpLabel
+        %106 = OpLoad %uint %i_1
+        %107 = OpIAdd %uint %106 %uint_1
+               OpStore %i_1 %107
+               OpBranch %86
+         %87 = OpLabel
+        %108 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %108
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m = OpFunction %mat2v3half None %109
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %113 = OpLabel
+        %117 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
+        %120 = OpAccessChain %_ptr_Uniform_v3half %117 %uint_0
+        %121 = OpLoad %v3half %120
+        %123 = OpAccessChain %_ptr_Uniform_v3half %117 %uint_1
+        %124 = OpLoad %v3half %123
+        %125 = OpCompositeConstruct %mat2v3half %121 %124
+               OpReturnValue %125
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2 = OpFunction %v3half None %126
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %131 = OpLabel
+               OpSelectionMerge %132 None
+               OpSwitch %p2 %133 0 %134 1 %135
+        %134 = OpLabel
+        %136 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %137 = OpLoad %v3half %136
+               OpReturnValue %137
+        %135 = OpLabel
+        %138 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %139 = OpLoad %v3half %138
+               OpReturnValue %139
+        %133 = OpLabel
+               OpReturnValue %140
+        %132 = OpLabel
+               OpReturnValue %140
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %half None %141
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %147 = OpLabel
+               OpSelectionMerge %148 None
+               OpSwitch %p2_0 %149 0 %150 1 %151
+        %150 = OpLabel
+        %153 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %154 = OpLoad %half %153
+               OpReturnValue %154
+        %151 = OpLabel
+        %155 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %156 = OpLoad %half %155
+               OpReturnValue %156
+        %149 = OpLabel
+               OpReturnValue %157
+        %148 = OpLabel
+               OpReturnValue %157
+               OpFunctionEnd
+          %f = OpFunction %void None %158
+        %161 = OpLabel
+        %162 = OpFunctionCall %int %i
+        %163 = OpFunctionCall %int %i
+        %164 = OpFunctionCall %int %i
+        %167 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %168 = OpLoad %_arr_Outer_std140_uint_4 %167
+        %165 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %168
+        %171 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %162
+        %172 = OpLoad %Outer_std140 %171
+        %169 = OpFunctionCall %Outer %conv_Outer %172
+        %175 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %162 %uint_0
+        %176 = OpLoad %_arr_Inner_std140_uint_4 %175
+        %173 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %176
+        %178 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %162 %uint_0 %163
+        %179 = OpLoad %Inner_std140 %178
+        %177 = OpFunctionCall %Inner %conv_Inner %179
+        %181 = OpBitcast %uint %162
+        %182 = OpBitcast %uint %163
+        %180 = OpFunctionCall %mat2v3half %load_a_inner_p0_a_p1_m %181 %182
+        %184 = OpBitcast %uint %162
+        %185 = OpBitcast %uint %163
+        %186 = OpBitcast %uint %164
+        %183 = OpFunctionCall %v3half %load_a_inner_p0_a_p1_m_p2 %184 %185 %186
+        %187 = OpFunctionCall %int %i
+        %189 = OpBitcast %uint %162
+        %190 = OpBitcast %uint %163
+        %191 = OpBitcast %uint %164
+        %192 = OpBitcast %uint %187
+        %188 = OpFunctionCall %half %load_a_inner_p0_a_p1_m_p2_p3 %189 %190 %191 %192
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..67b4731
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,36 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat2x3<f16> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec3<f16> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f16 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..8f21106
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,31 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat2x3<f16>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec3<f16>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f16             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..eca3eed
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,71 @@
+struct Inner {
+  matrix<float16_t, 2, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 2, 3> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 2, 3> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_4 = a[56].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_a_3_a_2_m_1 = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b6de301
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,76 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 2, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 2, 3> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 2, 3> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_4 = a[56].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_a_3_a_2_m_1 = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000016A3B044D10(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..1a1a8cc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,101 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat2x3 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Inner_std140 {
+  f16vec3 m_0;
+  f16vec3 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat2x3(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x3 load_a_inner_3_a_2_m() {
+  return f16mat2x3(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1);
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat2x3 p_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec3 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat2x3 l_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec3 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float16_t l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..646eacf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half2x3 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  half2x3 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  half3 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  half const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..8522fa4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,227 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 140
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%Inner_std140 = OpTypeStruct %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+      %Inner = OpTypeStruct %mat2v3half
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %22 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %29 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %32 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %45 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %58 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %66 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %73 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %86 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+         %98 = OpTypeFunction %mat2v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+       %void = OpTypeVoid
+        %115 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v3half %val 0
+         %19 = OpCompositeExtract %v3half %val 1
+         %20 = OpCompositeConstruct %mat2v3half %18 %19
+         %21 = OpCompositeConstruct %Inner %20
+               OpReturnValue %21
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %22
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %26 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %29
+          %i = OpVariable %_ptr_Function_uint Function %32
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %45
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %34 %35 None
+               OpBranch %36
+         %36 = OpLabel
+         %38 = OpLoad %uint %i
+         %39 = OpULessThan %bool %38 %uint_4
+         %37 = OpLogicalNot %bool %39
+               OpSelectionMerge %41 None
+               OpBranchConditional %37 %42 %41
+         %42 = OpLabel
+               OpBranch %34
+         %41 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_Inner %arr %46
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %50
+         %53 = OpLoad %Inner_std140 %52
+         %49 = OpFunctionCall %Inner %conv_Inner %53
+               OpStore %48 %49
+               OpBranch %35
+         %35 = OpLabel
+         %54 = OpLoad %uint %i
+         %56 = OpIAdd %uint %54 %uint_1
+               OpStore %i %56
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %57
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %58
+      %val_1 = OpFunctionParameter %Outer_std140
+         %62 = OpLabel
+         %64 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %63 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %64
+         %65 = OpCompositeConstruct %Outer %63
+               OpReturnValue %65
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %66
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %70 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %73
+        %i_0 = OpVariable %_ptr_Function_uint Function %32
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %86
+               OpBranch %75
+         %75 = OpLabel
+               OpLoopMerge %76 %77 None
+               OpBranch %78
+         %78 = OpLabel
+         %80 = OpLoad %uint %i_0
+         %81 = OpULessThan %bool %80 %uint_4
+         %79 = OpLogicalNot %bool %81
+               OpSelectionMerge %82 None
+               OpBranchConditional %79 %83 %82
+         %83 = OpLabel
+               OpBranch %76
+         %82 = OpLabel
+               OpStore %var_for_index %val_2
+         %87 = OpLoad %uint %i_0
+         %89 = OpAccessChain %_ptr_Function_Outer %arr_0 %87
+         %91 = OpLoad %uint %i_0
+         %93 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %91
+         %94 = OpLoad %Outer_std140 %93
+         %90 = OpFunctionCall %Outer %conv_Outer %94
+               OpStore %89 %90
+               OpBranch %77
+         %77 = OpLabel
+         %95 = OpLoad %uint %i_0
+         %96 = OpIAdd %uint %95 %uint_1
+               OpStore %i_0 %96
+               OpBranch %75
+         %76 = OpLabel
+         %97 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %97
+               OpFunctionEnd
+%load_a_inner_3_a_2_m = OpFunction %mat2v3half None %98
+        %100 = OpLabel
+        %106 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %109 = OpAccessChain %_ptr_Uniform_v3half %106 %uint_0
+        %110 = OpLoad %v3half %109
+        %112 = OpAccessChain %_ptr_Uniform_v3half %106 %uint_1
+        %113 = OpLoad %v3half %112
+        %114 = OpCompositeConstruct %mat2v3half %110 %113
+               OpReturnValue %114
+               OpFunctionEnd
+          %f = OpFunction %void None %115
+        %118 = OpLabel
+        %121 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %122 = OpLoad %_arr_Outer_std140_uint_4 %121
+        %119 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %122
+        %125 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %126 = OpLoad %Outer_std140 %125
+        %123 = OpFunctionCall %Outer %conv_Outer %126
+        %129 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %130 = OpLoad %_arr_Inner_std140_uint_4 %129
+        %127 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %130
+        %132 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %133 = OpLoad %Inner_std140 %132
+        %131 = OpFunctionCall %Inner %conv_Inner %133
+        %134 = OpFunctionCall %mat2v3half %load_a_inner_3_a_2_m
+        %135 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %136 = OpLoad %v3half %135
+        %138 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %32
+        %139 = OpLoad %half %138
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..bceee7d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,29 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat2x3<f16> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec3<f16> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f16 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl
new file mode 100644
index 0000000..765f539
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].zxy);
+    let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..fbcd74f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 2> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d695652
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,36 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 2> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002BEEDB9DED0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..55d79ff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,87 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat2x3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+f16mat2x3 load_u_inner_2_m() {
+  return f16mat2x3(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  f16mat3x2 t = transpose(load_u_inner_2_m());
+  float16_t l = length(u.inner[0u].m_1.zxy);
+  float16_t a = abs(u.inner[0u].m_1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..7f897eb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half2x3 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  half3x2 const t = transpose((*(tint_symbol))[2].m);
+  half const l = length(half3((*(tint_symbol))[0].m[1]).zxy);
+  half const a = fabs(half3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..f0c24f2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %38 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+         %11 = OpTypeFunction %mat2v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+     %v2half = OpTypeVector %half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+         %39 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat2v3half None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v3half %19 %uint_1
+         %24 = OpLoad %v3half %23
+         %26 = OpAccessChain %_ptr_Uniform_v3half %19 %uint_2
+         %27 = OpLoad %v3half %26
+         %28 = OpCompositeConstruct %mat2v3half %24 %27
+               OpReturnValue %28
+               OpFunctionEnd
+          %f = OpFunction %void None %29
+         %32 = OpLabel
+         %36 = OpFunctionCall %mat2v3half %load_u_inner_2_m
+         %33 = OpTranspose %mat3v2half %36
+         %40 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %39 %uint_2
+         %41 = OpLoad %v3half %40
+         %42 = OpVectorShuffle %v3half %41 %41 2 0 1
+         %37 = OpExtInst %half %38 Length %42
+         %44 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %39 %uint_2
+         %45 = OpLoad %v3half %44
+         %46 = OpVectorShuffle %v3half %45 %45 2 0 1
+         %47 = OpCompositeExtract %half %46 0
+         %43 = OpExtInst %half %38 FAbs %47
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..37f0157
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].zxy);
+  let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl
new file mode 100644
index 0000000..c601f63
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl

@@ -0,0 +1,25 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat2x3<f16>) {}
+fn d(v : vec3<f16>) {}
+fn e(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].zxy);
+    e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0e799ce
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,72 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 2, 3> m) {
+}
+
+void d(vector<float16_t, 3> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  e(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..aeb6e6f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,77 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 2, 3> m) {
+}
+
+void d(vector<float16_t, 3> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  e(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002273DEDC980(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..c0fdc72
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,118 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat2x3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(f16mat2x3 m) {
+}
+
+void d(f16vec3 v) {
+}
+
+void e(float16_t f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat2x3(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x3 load_u_inner_2_m() {
+  return f16mat2x3(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.zxy);
+  e(u.inner[0u].m_1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..9031d44
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half2x3 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(half2x3 m) {
+}
+
+void d(half3 v) {
+}
+
+void e(half f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(half3((*(tint_symbol))[0].m[1]).zxy);
+  e(half3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..9f29608
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,204 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 119
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat2v3half = OpTypeMatrix %v3half 2
+          %S = OpTypeStruct %int %mat2v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat2v3half
+         %27 = OpTypeFunction %void %v3half
+         %31 = OpTypeFunction %void %half
+         %35 = OpTypeFunction %S %S_std140
+         %45 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %51 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %54 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %67 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %80 = OpTypeFunction %mat2v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+         %96 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat2v3half
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v3half
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %half
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v3half %val 1
+         %41 = OpCompositeExtract %v3half %val 2
+         %42 = OpCompositeConstruct %mat2v3half %40 %41
+         %43 = OpCompositeExtract %int %val 3
+         %44 = OpCompositeConstruct %S %39 %42 %43
+               OpReturnValue %44
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %45
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %48 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %51
+          %i = OpVariable %_ptr_Function_uint Function %54
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %67
+               OpBranch %55
+         %55 = OpLabel
+               OpLoopMerge %56 %57 None
+               OpBranch %58
+         %58 = OpLabel
+         %60 = OpLoad %uint %i
+         %61 = OpULessThan %bool %60 %uint_4
+         %59 = OpLogicalNot %bool %61
+               OpSelectionMerge %63 None
+               OpBranchConditional %59 %64 %63
+         %64 = OpLabel
+               OpBranch %56
+         %63 = OpLabel
+               OpStore %var_for_index %val_0
+         %68 = OpLoad %uint %i
+         %70 = OpAccessChain %_ptr_Function_S %arr %68
+         %72 = OpLoad %uint %i
+         %74 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %72
+         %75 = OpLoad %S_std140 %74
+         %71 = OpFunctionCall %S %conv_S %75
+               OpStore %70 %71
+               OpBranch %57
+         %57 = OpLabel
+         %76 = OpLoad %uint %i
+         %78 = OpIAdd %uint %76 %uint_1
+               OpStore %i %78
+               OpBranch %55
+         %56 = OpLabel
+         %79 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %79
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v3half None %80
+         %82 = OpLabel
+         %87 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %90 = OpAccessChain %_ptr_Uniform_v3half %87 %uint_1
+         %91 = OpLoad %v3half %90
+         %93 = OpAccessChain %_ptr_Uniform_v3half %87 %uint_2
+         %94 = OpLoad %v3half %93
+         %95 = OpCompositeConstruct %mat2v3half %91 %94
+               OpReturnValue %95
+               OpFunctionEnd
+          %f = OpFunction %void None %96
+         %98 = OpLabel
+        %102 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %103 = OpLoad %_arr_S_std140_uint_4 %102
+        %100 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %103
+         %99 = OpFunctionCall %void %a %100
+        %106 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %107 = OpLoad %S_std140 %106
+        %105 = OpFunctionCall %S %conv_S %107
+        %104 = OpFunctionCall %void %b %105
+        %109 = OpFunctionCall %mat2v3half %load_u_inner_2_m
+        %108 = OpFunctionCall %void %c %109
+        %111 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %54 %uint_2
+        %112 = OpLoad %v3half %111
+        %113 = OpVectorShuffle %v3half %112 %112 2 0 1
+        %110 = OpFunctionCall %void %d %113
+        %115 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %54 %uint_2
+        %116 = OpLoad %v3half %115
+        %117 = OpVectorShuffle %v3half %116 %116 2 0 1
+        %118 = OpCompositeExtract %half %117 0
+        %114 = OpFunctionCall %void %e %118
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..cca1442
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat2x3<f16>) {
+}
+
+fn d(v : vec3<f16>) {
+}
+
+fn e(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].zxy);
+  e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl
new file mode 100644
index 0000000..9c99b5b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..918efe0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,54 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 2, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  p[1].m[0] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b82501d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,59 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 2, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  p[1].m[0] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy;
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000296576AF9E0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..38b15d7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,103 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat2x3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat2x3(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x3 load_u_inner_2_m() {
+  return f16mat2x3(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..53a16ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half2x3 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = half3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..e047cd5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,171 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 101
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+          %S = OpTypeStruct %int %mat2v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %35 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %48 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %61 = OpTypeFunction %mat2v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+       %void = OpTypeVoid
+         %77 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat2v3half = OpTypePointer Private %mat2v3half
+         %95 = OpConstantNull %int
+%_ptr_Private_v3half = OpTypePointer Private %v3half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v3half %val 1
+         %23 = OpCompositeExtract %v3half %val 2
+         %24 = OpCompositeConstruct %mat2v3half %22 %23
+         %25 = OpCompositeExtract %int %val 3
+         %26 = OpCompositeConstruct %S %21 %24 %25
+               OpReturnValue %26
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %27
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %30 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %35
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %48
+               OpBranch %36
+         %36 = OpLabel
+               OpLoopMerge %37 %38 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %uint %i
+         %42 = OpULessThan %bool %41 %uint_4
+         %40 = OpLogicalNot %bool %42
+               OpSelectionMerge %44 None
+               OpBranchConditional %40 %45 %44
+         %45 = OpLabel
+               OpBranch %37
+         %44 = OpLabel
+               OpStore %var_for_index %val_0
+         %49 = OpLoad %uint %i
+         %51 = OpAccessChain %_ptr_Function_S %arr %49
+         %53 = OpLoad %uint %i
+         %55 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %53
+         %56 = OpLoad %S_std140 %55
+         %52 = OpFunctionCall %S %conv_S %56
+               OpStore %51 %52
+               OpBranch %38
+         %38 = OpLabel
+         %57 = OpLoad %uint %i
+         %59 = OpIAdd %uint %57 %uint_1
+               OpStore %i %59
+               OpBranch %36
+         %37 = OpLabel
+         %60 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %60
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v3half None %61
+         %63 = OpLabel
+         %68 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %71 = OpAccessChain %_ptr_Uniform_v3half %68 %uint_1
+         %72 = OpLoad %v3half %71
+         %74 = OpAccessChain %_ptr_Uniform_v3half %68 %uint_2
+         %75 = OpLoad %v3half %74
+         %76 = OpCompositeConstruct %mat2v3half %72 %75
+               OpReturnValue %76
+               OpFunctionEnd
+          %f = OpFunction %void None %77
+         %80 = OpLabel
+         %83 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %84 = OpLoad %_arr_S_std140_uint_4 %83
+         %81 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %84
+               OpStore %p %81
+         %87 = OpAccessChain %_ptr_Private_S %p %int_1
+         %89 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %90 = OpLoad %S_std140 %89
+         %88 = OpFunctionCall %S %conv_S %90
+               OpStore %87 %88
+         %93 = OpAccessChain %_ptr_Private_mat2v3half %p %int_3 %uint_1
+         %94 = OpFunctionCall %mat2v3half %load_u_inner_2_m
+               OpStore %93 %94
+         %97 = OpAccessChain %_ptr_Private_v3half %p %int_1 %uint_1 %95
+         %98 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %35 %uint_2
+         %99 = OpLoad %v3half %98
+        %100 = OpVectorShuffle %v3half %99 %99 2 0 1
+               OpStore %97 %100
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..015af90
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl
new file mode 100644
index 0000000..40bb077
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ba15d73
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,74 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 2, 3> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  s.Store<vector<float16_t, 3> >(136u, vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ecf95e4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,79 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 2, 3> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  s.Store<vector<float16_t, 3> >(136u, vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001EAD8916A10(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..4129467
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,106 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat2x3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat2x3(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x3 load_u_inner_2_m() {
+  return f16mat2x3(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..58393ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half2x3 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = half3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..68c548d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,180 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 104
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+          %S = OpTypeStruct %int %mat2v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %33 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %36 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %49 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %62 = OpTypeFunction %mat2v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+       %void = OpTypeVoid
+         %78 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat2v3half = OpTypePointer StorageBuffer %mat2v3half
+         %98 = OpConstantNull %int
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v3half %val 1
+         %23 = OpCompositeExtract %v3half %val 2
+         %24 = OpCompositeConstruct %mat2v3half %22 %23
+         %25 = OpCompositeExtract %int %val 3
+         %26 = OpCompositeConstruct %S %21 %24 %25
+               OpReturnValue %26
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %27
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %30 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %33
+          %i = OpVariable %_ptr_Function_uint Function %36
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
+               OpBranch %37
+         %37 = OpLabel
+               OpLoopMerge %38 %39 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %uint %i
+         %43 = OpULessThan %bool %42 %uint_4
+         %41 = OpLogicalNot %bool %43
+               OpSelectionMerge %45 None
+               OpBranchConditional %41 %46 %45
+         %46 = OpLabel
+               OpBranch %38
+         %45 = OpLabel
+               OpStore %var_for_index %val_0
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_S %arr %50
+         %54 = OpLoad %uint %i
+         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
+         %57 = OpLoad %S_std140 %56
+         %53 = OpFunctionCall %S %conv_S %57
+               OpStore %52 %53
+               OpBranch %39
+         %39 = OpLabel
+         %58 = OpLoad %uint %i
+         %60 = OpIAdd %uint %58 %uint_1
+               OpStore %i %60
+               OpBranch %37
+         %38 = OpLabel
+         %61 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %61
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v3half None %62
+         %64 = OpLabel
+         %69 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %72 = OpAccessChain %_ptr_Uniform_v3half %69 %uint_1
+         %73 = OpLoad %v3half %72
+         %75 = OpAccessChain %_ptr_Uniform_v3half %69 %uint_2
+         %76 = OpLoad %v3half %75
+         %77 = OpCompositeConstruct %mat2v3half %73 %76
+               OpReturnValue %77
+               OpFunctionEnd
+          %f = OpFunction %void None %78
+         %81 = OpLabel
+         %83 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %86 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %87 = OpLoad %_arr_S_std140_uint_4 %86
+         %84 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %87
+               OpStore %83 %84
+         %90 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %92 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %93 = OpLoad %S_std140 %92
+         %91 = OpFunctionCall %S %conv_S %93
+               OpStore %90 %91
+         %96 = OpAccessChain %_ptr_StorageBuffer_mat2v3half %s %uint_0 %int_3 %uint_1
+         %97 = OpFunctionCall %mat2v3half %load_u_inner_2_m
+               OpStore %96 %97
+        %100 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1 %uint_1 %98
+        %101 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %36 %uint_2
+        %102 = OpLoad %v3half %101
+        %103 = OpVectorShuffle %v3half %102 %102 2 0 1
+               OpStore %100 %103
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..2cf8a20
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..3292542
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3ddda98
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,70 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 3> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  w[1].m[0] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..20fbb45
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,75 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 3> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  w[1].m[0] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001CE6EDD85C0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..d3addb0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,111 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat2x3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat2x3(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x3 load_u_inner_2_m() {
+  return f16mat2x3(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, f16mat2x3(f16vec3(0.0hf), f16vec3(0.0hf)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..816c6c6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half2x3 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = half3((*(tint_symbol_2))[0].m[1]).zxy;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..261be93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,214 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 126
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+          %S = OpTypeStruct %int %mat2v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %34 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat2v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+       %void = OpTypeVoid
+         %79 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %97 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat2v3half = OpTypePointer Workgroup %mat2v3half
+        %115 = OpConstantNull %int
+%_ptr_Workgroup_v3half = OpTypePointer Workgroup %v3half
+        %121 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v3half %val 1
+         %24 = OpCompositeExtract %v3half %val 2
+         %25 = OpCompositeConstruct %mat2v3half %23 %24
+         %26 = OpCompositeExtract %int %val 3
+         %27 = OpCompositeConstruct %S %22 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v3half None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v3half %70 %uint_1
+         %74 = OpLoad %v3half %73
+         %76 = OpAccessChain %_ptr_Uniform_v3half %70 %uint_2
+         %77 = OpLoad %v3half %76
+         %78 = OpCompositeConstruct %mat2v3half %74 %77
+               OpReturnValue %78
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %79
+%local_invocation_index = OpFunctionParameter %uint
+         %83 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %37
+               OpStore %idx %local_invocation_index
+               OpBranch %85
+         %85 = OpLabel
+               OpLoopMerge %86 %87 None
+               OpBranch %88
+         %88 = OpLabel
+         %90 = OpLoad %uint %idx
+         %91 = OpULessThan %bool %90 %uint_4
+         %89 = OpLogicalNot %bool %91
+               OpSelectionMerge %92 None
+               OpBranchConditional %89 %93 %92
+         %93 = OpLabel
+               OpBranch %86
+         %92 = OpLabel
+         %94 = OpLoad %uint %idx
+         %96 = OpAccessChain %_ptr_Workgroup_S %w %94
+               OpStore %96 %97
+               OpBranch %87
+         %87 = OpLabel
+         %98 = OpLoad %uint %idx
+         %99 = OpIAdd %uint %98 %uint_1
+               OpStore %idx %99
+               OpBranch %85
+         %86 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %104 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %105 = OpLoad %_arr_S_std140_uint_4 %104
+        %102 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %105
+               OpStore %w %102
+        %107 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %109 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %110 = OpLoad %S_std140 %109
+        %108 = OpFunctionCall %S %conv_S %110
+               OpStore %107 %108
+        %113 = OpAccessChain %_ptr_Workgroup_mat2v3half %w %int_3 %uint_1
+        %114 = OpFunctionCall %mat2v3half %load_u_inner_2_m
+               OpStore %113 %114
+        %117 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1 %uint_1 %115
+        %118 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %37 %uint_2
+        %119 = OpLoad %v3half %118
+        %120 = OpVectorShuffle %v3half %119 %119 2 0 1
+               OpStore %117 %120
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %121
+        %123 = OpLabel
+        %125 = OpLoad %uint %local_invocation_index_1
+        %124 = OpFunctionCall %void %f_inner %125
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..dfe935a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..133588b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,32 @@
+struct Inner {
+  @size(64)
+  m : mat2x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat2x3<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec3<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..553de51
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,75 @@
+struct Inner {
+  float2x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x3 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float2x3 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float3 l_a_i_a_i_m_i = asfloat(a[scalar_offset_2 / 4].xyz);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_3 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_3 / 4][scalar_offset_3 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..553de51
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,75 @@
+struct Inner {
+  float2x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x3 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float2x3 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float3 l_a_i_a_i_m_i = asfloat(a[scalar_offset_2 / 4].xyz);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_3 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_3 / 4][scalar_offset_3 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..6fa3df3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,50 @@
+#version 310 es
+
+struct Inner {
+  mat2x3 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_a_i_save = tint_symbol_1;
+  int tint_symbol_2 = i();
+  int p_a_i_a_i_m_i_save = tint_symbol_2;
+  Outer l_a[4] = a.inner;
+  Outer l_a_i = a.inner[p_a_i_save];
+  Inner l_a_i_a[4] = a.inner[p_a_i_save].a;
+  Inner l_a_i_a_i = a.inner[p_a_i_save].a[p_a_i_a_i_save];
+  mat2x3 l_a_i_a_i_m = a.inner[p_a_i_save].a[p_a_i_a_i_save].m;
+  vec3 l_a_i_a_i_m_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int tint_symbol_3 = i();
+  float l_a_i_a_i_m_i_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..d4767f8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float2x3 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  float2x3 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  float3 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..355a8dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,88 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 54
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+      %Inner = OpTypeStruct %mat2v3float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %14 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %14
+         %17 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %24 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %i = OpFunction %int None %17
+         %19 = OpLabel
+         %20 = OpLoad %int %counter
+         %22 = OpIAdd %int %20 %int_1
+               OpStore %counter %22
+         %23 = OpLoad %int %counter
+               OpReturnValue %23
+               OpFunctionEnd
+          %f = OpFunction %void None %24
+         %27 = OpLabel
+         %28 = OpFunctionCall %int %i
+         %29 = OpFunctionCall %int %i
+         %30 = OpFunctionCall %int %i
+         %33 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %34 = OpLoad %_arr_Outer_uint_4 %33
+         %36 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %28
+         %37 = OpLoad %Outer %36
+         %39 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %28 %uint_0
+         %40 = OpLoad %_arr_Inner_uint_4 %39
+         %42 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %28 %uint_0 %29
+         %43 = OpLoad %Inner %42
+         %45 = OpAccessChain %_ptr_Uniform_mat2v3float %a %uint_0 %28 %uint_0 %29 %uint_0
+         %46 = OpLoad %mat2v3float %45
+         %48 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %28 %uint_0 %29 %uint_0 %30
+         %49 = OpLoad %v3float %48
+         %50 = OpFunctionCall %int %i
+         %52 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %28 %uint_0 %29 %uint_0 %30 %50
+         %53 = OpLoad %float %52
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..38d96d7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+struct Inner {
+  @size(64)
+  m : mat2x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat2x3<f32> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec3<f32> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..6222c44
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,29 @@
+struct Inner {
+  @size(64)
+  m : mat2x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat2x3<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec3<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2273b9f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,60 @@
+struct Inner {
+  float2x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float2x3 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float2x3 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float3 l_a_3_a_2_m_1 = asfloat(a[57].xyz);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2273b9f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,60 @@
+struct Inner {
+  float2x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float2x3 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float2x3 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float3 l_a_3_a_2_m_1 = asfloat(a[57].xyz);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..68a7980
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,37 @@
+#version 310 es
+
+struct Inner {
+  mat2x3 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+void f() {
+  Outer l_a[4] = a.inner;
+  Outer l_a_3 = a.inner[3];
+  Inner l_a_3_a[4] = a.inner[3].a;
+  Inner l_a_3_a_2 = a.inner[3].a[2];
+  mat2x3 l_a_3_a_2_m = a.inner[3].a[2].m;
+  vec3 l_a_3_a_2_m_1 = a.inner[3].a[2].m[1];
+  float l_a_3_a_2_m_1_0 = a.inner[3].a[2].m[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..b36ca6e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float2x3 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  float2x3 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  float3 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..a330e0d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,73 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+      %Inner = OpTypeStruct %mat2v3float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+        %int = OpTypeInt 32 1
+      %int_3 = OpConstant %int 3
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+         %40 = OpConstantNull %int
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %20 = OpLoad %_arr_Outer_uint_4 %19
+         %24 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %int_3
+         %25 = OpLoad %Outer %24
+         %27 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %int_3 %uint_0
+         %28 = OpLoad %_arr_Inner_uint_4 %27
+         %31 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %int_3 %uint_0 %int_2
+         %32 = OpLoad %Inner %31
+         %34 = OpAccessChain %_ptr_Uniform_mat2v3float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0
+         %35 = OpLoad %mat2v3float %34
+         %38 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1
+         %39 = OpLoad %v3float %38
+         %42 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1 %40
+         %43 = OpLoad %float %42
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..df426aa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,27 @@
+struct Inner {
+  @size(64)
+  m : mat2x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat2x3<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec3<f32> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl
new file mode 100644
index 0000000..75fee9d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat2x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].zxy);
+    let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..158973f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float2x3 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x2 t = transpose(tint_symbol(u, 272u));
+  const float l = length(asfloat(u[2].xyz).zxy);
+  const float a = abs(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..158973f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float2x3 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x2 t = transpose(tint_symbol(u, 272u));
+  const float l = length(asfloat(u[2].xyz).zxy);
+  const float a = abs(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..60746c6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,45 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat2x3 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void f() {
+  mat3x2 t = transpose(u.inner[2].m);
+  float l = length(u.inner[0].m[1].zxy);
+  float a = abs(u.inner[0].m[1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..febc7d5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float2x3 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float3x2 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float3((*(tint_symbol))[0].m[1]).zxy);
+  float const a = fabs(float3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..329aa40
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,67 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 38
+; Schema: 0
+               OpCapability Shader
+         %26 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+          %S = OpTypeStruct %int %mat2v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+    %v2float = OpTypeVector %float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+     %uint_0 = OpConstant %uint 0
+      %int_2 = OpConstant %int 2
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+         %27 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %23 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0 %int_2 %uint_1
+         %24 = OpLoad %mat2v3float %23
+         %16 = OpTranspose %mat3v2float %24
+         %30 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %27 %uint_1 %int_1
+         %31 = OpLoad %v3float %30
+         %32 = OpVectorShuffle %v3float %31 %31 2 0 1
+         %25 = OpExtInst %float %26 Length %32
+         %34 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %27 %uint_1 %int_1
+         %35 = OpLoad %v3float %34
+         %36 = OpVectorShuffle %v3float %35 %35 2 0 1
+         %37 = OpCompositeExtract %float %36 0
+         %33 = OpExtInst %float %26 FAbs %37
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..32a5a51
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat2x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].zxy);
+  let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl
new file mode 100644
index 0000000..d8af58b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl

@@ -0,0 +1,23 @@
+struct S {
+  before : i32,
+  m : mat2x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat2x3<f32>) {}
+fn d(v : vec3<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].zxy);
+    e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c48f877
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,58 @@
+struct S {
+  int before;
+  float2x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float2x3 m) {
+}
+
+void d(float3 v) {
+}
+
+void e(float f_1) {
+}
+
+float2x3 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 272u));
+  d(asfloat(u[2].xyz).zxy);
+  e(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c48f877
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,58 @@
+struct S {
+  int before;
+  float2x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float2x3 m) {
+}
+
+void d(float3 v) {
+}
+
+void e(float f_1) {
+}
+
+float2x3 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 272u));
+  d(asfloat(u[2].xyz).zxy);
+  e(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..5fb9e19
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,62 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat2x3 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat2x3 m) {
+}
+
+void d(vec3 v) {
+}
+
+void e(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[2]);
+  c(u.inner[2].m);
+  d(u.inner[0].m[1].zxy);
+  e(u.inner[0].m[1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..5835b92
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float2x3 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float2x3 m) {
+}
+
+void d(float3 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float3((*(tint_symbol))[0].m[1]).zxy);
+  e(float3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..7bcf25c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,112 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 63
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+          %S = OpTypeStruct %int %mat2v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %_arr_S_uint_4
+         %17 = OpTypeFunction %void %S
+         %21 = OpTypeFunction %void %mat2v3float
+         %25 = OpTypeFunction %void %v3float
+         %29 = OpTypeFunction %void %float
+         %33 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+         %52 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %a = OpFunction %void None %12
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %s = OpFunctionParameter %S
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %m = OpFunctionParameter %mat2v3float
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+          %v = OpFunctionParameter %v3float
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %29
+        %f_1 = OpFunctionParameter %float
+         %32 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %40 = OpLoad %_arr_S_uint_4 %39
+         %36 = OpFunctionCall %void %a %40
+         %44 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %45 = OpLoad %S %44
+         %41 = OpFunctionCall %void %b %45
+         %49 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0 %int_2 %uint_1
+         %50 = OpLoad %mat2v3float %49
+         %46 = OpFunctionCall %void %c %50
+         %55 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %52 %uint_1 %int_1
+         %56 = OpLoad %v3float %55
+         %57 = OpVectorShuffle %v3float %56 %56 2 0 1
+         %51 = OpFunctionCall %void %d %57
+         %59 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %52 %uint_1 %int_1
+         %60 = OpLoad %v3float %59
+         %61 = OpVectorShuffle %v3float %60 %60 2 0 1
+         %62 = OpCompositeExtract %float %61 0
+         %58 = OpFunctionCall %void %e %62
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..0058849
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,32 @@
+struct S {
+  before : i32,
+  m : mat2x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat2x3<f32>) {
+}
+
+fn d(v : vec3<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].zxy);
+  e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl
new file mode 100644
index 0000000..510398c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..08aa42f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,43 @@
+struct S {
+  int before;
+  float2x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float2x3 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 272u);
+  p[1].m[0] = asfloat(u[2].xyz).zxy;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..08aa42f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,43 @@
+struct S {
+  int before;
+  float2x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float2x3 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 272u);
+  p[1].m[0] = asfloat(u[2].xyz).zxy;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..6c7f307
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,47 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat2x3 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, 0u, 0u, mat2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[3].m = u.inner[2].m;
+  p[1].m[0] = u.inner[0].m[1].zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..40d4274
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float2x3 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..9e61a0d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,78 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+          %S = OpTypeStruct %int %mat2v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %14 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %14
+       %void = OpTypeVoid
+         %15 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_Private_mat2v3float = OpTypePointer Private %mat2v3float
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+         %37 = OpConstantNull %int
+%_ptr_Private_v3float = OpTypePointer Private %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %15
+         %18 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %22 = OpLoad %_arr_S_uint_4 %21
+               OpStore %p %22
+         %25 = OpAccessChain %_ptr_Private_S %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %29 = OpLoad %S %28
+               OpStore %25 %29
+         %33 = OpAccessChain %_ptr_Private_mat2v3float %p %int_3 %uint_1
+         %35 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0 %int_2 %uint_1
+         %36 = OpLoad %mat2v3float %35
+               OpStore %33 %36
+         %39 = OpAccessChain %_ptr_Private_v3float %p %int_1 %uint_1 %37
+         %41 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %37 %uint_1 %int_1
+         %42 = OpLoad %v3float %41
+         %43 = OpVectorShuffle %v3float %42 %42 2 0 1
+               OpStore %39 %43
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..70fa82f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat2x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl
new file mode 100644
index 0000000..6e1221f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..89eabee
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,63 @@
+struct S {
+  int before;
+  float2x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float2x3 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 400u, tint_symbol_8(u, 272u));
+  s.Store3(144u, asuint(asfloat(u[2].xyz).zxy));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..89eabee
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,63 @@
+struct S {
+  int before;
+  float2x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float2x3 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 400u, tint_symbol_8(u, 272u));
+  s.Store3(144u, asuint(asfloat(u[2].xyz).zxy));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..00df1fa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,50 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat2x3 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[3].m = u.inner[2].m;
+  s.inner[1].m[0] = u.inner[0].m[1].zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..181d0e3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float2x3 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..3d31ce0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+          %S = OpTypeStruct %int %mat2v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+         %38 = OpConstantNull %int
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %14
+         %17 = OpLabel
+         %20 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %22 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %23 = OpLoad %_arr_S_uint_4 %22
+               OpStore %20 %23
+         %26 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %30 = OpLoad %S %29
+               OpStore %26 %30
+         %34 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %s %uint_0 %int_3 %uint_1
+         %36 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0 %int_2 %uint_1
+         %37 = OpLoad %mat2v3float %36
+               OpStore %34 %37
+         %40 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1 %uint_1 %38
+         %42 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %38 %uint_1 %int_1
+         %43 = OpLoad %v3float %42
+         %44 = OpVectorShuffle %v3float %43 %43 2 0 1
+               OpStore %40 %44
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..06df2f7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat2x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..b2bb72d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e2fb38a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,59 @@
+struct S {
+  int before;
+  float2x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x3 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 272u);
+  w[1].m[0] = asfloat(u[2].xyz).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e2fb38a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,59 @@
+struct S {
+  int before;
+  float2x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x3 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 272u);
+  w[1].m[0] = asfloat(u[2].xyz).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..2e0e12b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,55 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat2x3 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+shared S w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, 0u, 0u, mat2x3(vec3(0.0f), vec3(0.0f)), 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[3].m = u.inner[2].m;
+  w[1].m[0] = u.inner[0].m[1].zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..6e4d54e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float2x3 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float3((*(tint_symbol_2))[0].m[1]).zxy;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..e510c36
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,124 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 72
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+          %S = OpTypeStruct %int %mat2v3float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+       %void = OpTypeVoid
+         %16 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %23 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %37 = OpConstantNull %S
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat2v3float = OpTypePointer Workgroup %mat2v3float
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+         %60 = OpConstantNull %int
+%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+         %67 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %16
+%local_invocation_index = OpFunctionParameter %uint
+         %20 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %23
+               OpStore %idx %local_invocation_index
+               OpBranch %24
+         %24 = OpLabel
+               OpLoopMerge %25 %26 None
+               OpBranch %27
+         %27 = OpLabel
+         %29 = OpLoad %uint %idx
+         %30 = OpULessThan %bool %29 %uint_4
+         %28 = OpLogicalNot %bool %30
+               OpSelectionMerge %32 None
+               OpBranchConditional %28 %33 %32
+         %33 = OpLabel
+               OpBranch %25
+         %32 = OpLabel
+         %34 = OpLoad %uint %idx
+         %36 = OpAccessChain %_ptr_Workgroup_S %w %34
+               OpStore %36 %37
+               OpBranch %26
+         %26 = OpLabel
+         %38 = OpLoad %uint %idx
+         %40 = OpIAdd %uint %38 %uint_1
+               OpStore %idx %40
+               OpBranch %24
+         %25 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %46 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %47 = OpLoad %_arr_S_uint_4 %46
+               OpStore %w %47
+         %49 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+         %52 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %53 = OpLoad %S %52
+               OpStore %49 %53
+         %56 = OpAccessChain %_ptr_Workgroup_mat2v3float %w %int_3 %uint_1
+         %58 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0 %int_2 %uint_1
+         %59 = OpLoad %mat2v3float %58
+               OpStore %56 %59
+         %62 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1 %uint_1 %60
+         %64 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %60 %uint_1 %int_1
+         %65 = OpLoad %v3float %64
+         %66 = OpVectorShuffle %v3float %65 %65 2 0 1
+               OpStore %62 %66
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %67
+         %69 = OpLabel
+         %71 = OpLoad %uint %local_invocation_index_1
+         %70 = OpFunctionCall %void %f_inner %71
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..9628ff9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat2x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..c007f9b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat2x4<f16>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec4<f16>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f16             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2bad103
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,88 @@
+struct Inner {
+  matrix<float16_t, 2, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 4> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 2, 4> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_5 = a[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_a_i_a_i_m_i = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b92b308
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,93 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 2, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 4> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 2, 4> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_5 = a[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_a_i_a_i_m_i = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001DE39EE1520(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..762fe11
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,147 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat2x4 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Inner_std140 {
+  f16vec4 m_0;
+  f16vec4 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat2x4(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x4 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return f16mat2x4(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1);
+}
+
+f16vec4 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    default: {
+      return f16vec4(0.0hf);
+      break;
+    }
+  }
+}
+
+float16_t load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    default: {
+      return 0.0hf;
+      break;
+    }
+  }
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat2x4 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  f16vec4 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat2x4 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  f16vec4 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float16_t l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..a4620ce
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half2x4 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  half2x4 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  half4 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  half const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..d8cab36
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,310 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 193
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%Inner_std140 = OpTypeStruct %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat2v4half = OpTypeMatrix %v4half 2
+      %Inner = OpTypeStruct %mat2v4half
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %33 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %40 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %43 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %56 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %69 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %77 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %84 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %97 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %109 = OpTypeFunction %mat2v4half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+        %126 = OpTypeFunction %v4half %uint %uint %uint
+        %140 = OpConstantNull %v4half
+        %141 = OpTypeFunction %half %uint %uint %uint %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %157 = OpConstantNull %half
+       %void = OpTypeVoid
+        %158 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v4half %val 0
+         %30 = OpCompositeExtract %v4half %val 1
+         %31 = OpCompositeConstruct %mat2v4half %29 %30
+         %32 = OpCompositeConstruct %Inner %31
+               OpReturnValue %32
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %33
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %37 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %40
+        %i_0 = OpVariable %_ptr_Function_uint Function %43
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %56
+               OpBranch %44
+         %44 = OpLabel
+               OpLoopMerge %45 %46 None
+               OpBranch %47
+         %47 = OpLabel
+         %49 = OpLoad %uint %i_0
+         %50 = OpULessThan %bool %49 %uint_4
+         %48 = OpLogicalNot %bool %50
+               OpSelectionMerge %52 None
+               OpBranchConditional %48 %53 %52
+         %53 = OpLabel
+               OpBranch %45
+         %52 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %57 = OpLoad %uint %i_0
+         %59 = OpAccessChain %_ptr_Function_Inner %arr %57
+         %61 = OpLoad %uint %i_0
+         %63 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %61
+         %64 = OpLoad %Inner_std140 %63
+         %60 = OpFunctionCall %Inner %conv_Inner %64
+               OpStore %59 %60
+               OpBranch %46
+         %46 = OpLabel
+         %65 = OpLoad %uint %i_0
+         %67 = OpIAdd %uint %65 %uint_1
+               OpStore %i_0 %67
+               OpBranch %44
+         %45 = OpLabel
+         %68 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %68
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %69
+      %val_1 = OpFunctionParameter %Outer_std140
+         %73 = OpLabel
+         %75 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %74 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %75
+         %76 = OpCompositeConstruct %Outer %74
+               OpReturnValue %76
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %77
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %81 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %84
+        %i_1 = OpVariable %_ptr_Function_uint Function %43
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %97
+               OpBranch %86
+         %86 = OpLabel
+               OpLoopMerge %87 %88 None
+               OpBranch %89
+         %89 = OpLabel
+         %91 = OpLoad %uint %i_1
+         %92 = OpULessThan %bool %91 %uint_4
+         %90 = OpLogicalNot %bool %92
+               OpSelectionMerge %93 None
+               OpBranchConditional %90 %94 %93
+         %94 = OpLabel
+               OpBranch %87
+         %93 = OpLabel
+               OpStore %var_for_index %val_2
+         %98 = OpLoad %uint %i_1
+        %100 = OpAccessChain %_ptr_Function_Outer %arr_0 %98
+        %102 = OpLoad %uint %i_1
+        %104 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %102
+        %105 = OpLoad %Outer_std140 %104
+        %101 = OpFunctionCall %Outer %conv_Outer %105
+               OpStore %100 %101
+               OpBranch %88
+         %88 = OpLabel
+        %106 = OpLoad %uint %i_1
+        %107 = OpIAdd %uint %106 %uint_1
+               OpStore %i_1 %107
+               OpBranch %86
+         %87 = OpLabel
+        %108 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %108
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m = OpFunction %mat2v4half None %109
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %113 = OpLabel
+        %117 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
+        %120 = OpAccessChain %_ptr_Uniform_v4half %117 %uint_0
+        %121 = OpLoad %v4half %120
+        %123 = OpAccessChain %_ptr_Uniform_v4half %117 %uint_1
+        %124 = OpLoad %v4half %123
+        %125 = OpCompositeConstruct %mat2v4half %121 %124
+               OpReturnValue %125
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2 = OpFunction %v4half None %126
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %131 = OpLabel
+               OpSelectionMerge %132 None
+               OpSwitch %p2 %133 0 %134 1 %135
+        %134 = OpLabel
+        %136 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %137 = OpLoad %v4half %136
+               OpReturnValue %137
+        %135 = OpLabel
+        %138 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %139 = OpLoad %v4half %138
+               OpReturnValue %139
+        %133 = OpLabel
+               OpReturnValue %140
+        %132 = OpLabel
+               OpReturnValue %140
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %half None %141
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %147 = OpLabel
+               OpSelectionMerge %148 None
+               OpSwitch %p2_0 %149 0 %150 1 %151
+        %150 = OpLabel
+        %153 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %154 = OpLoad %half %153
+               OpReturnValue %154
+        %151 = OpLabel
+        %155 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %156 = OpLoad %half %155
+               OpReturnValue %156
+        %149 = OpLabel
+               OpReturnValue %157
+        %148 = OpLabel
+               OpReturnValue %157
+               OpFunctionEnd
+          %f = OpFunction %void None %158
+        %161 = OpLabel
+        %162 = OpFunctionCall %int %i
+        %163 = OpFunctionCall %int %i
+        %164 = OpFunctionCall %int %i
+        %167 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %168 = OpLoad %_arr_Outer_std140_uint_4 %167
+        %165 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %168
+        %171 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %162
+        %172 = OpLoad %Outer_std140 %171
+        %169 = OpFunctionCall %Outer %conv_Outer %172
+        %175 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %162 %uint_0
+        %176 = OpLoad %_arr_Inner_std140_uint_4 %175
+        %173 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %176
+        %178 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %162 %uint_0 %163
+        %179 = OpLoad %Inner_std140 %178
+        %177 = OpFunctionCall %Inner %conv_Inner %179
+        %181 = OpBitcast %uint %162
+        %182 = OpBitcast %uint %163
+        %180 = OpFunctionCall %mat2v4half %load_a_inner_p0_a_p1_m %181 %182
+        %184 = OpBitcast %uint %162
+        %185 = OpBitcast %uint %163
+        %186 = OpBitcast %uint %164
+        %183 = OpFunctionCall %v4half %load_a_inner_p0_a_p1_m_p2 %184 %185 %186
+        %187 = OpFunctionCall %int %i
+        %189 = OpBitcast %uint %162
+        %190 = OpBitcast %uint %163
+        %191 = OpBitcast %uint %164
+        %192 = OpBitcast %uint %187
+        %188 = OpFunctionCall %half %load_a_inner_p0_a_p1_m_p2_p3 %189 %190 %191 %192
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..05398c9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,36 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat2x4<f16> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec4<f16> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f16 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..1f4f376
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,31 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat2x4<f16>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec4<f16>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f16             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..03120f7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,71 @@
+struct Inner {
+  matrix<float16_t, 2, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 2, 4> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 2, 4> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_4 = a[56].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_a_3_a_2_m_1 = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..39aab69
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,76 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 2, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 2, 4> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 2, 4> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_4 = a[56].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_a_3_a_2_m_1 = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001CF0BABD660(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..659e661
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,101 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat2x4 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Inner_std140 {
+  f16vec4 m_0;
+  f16vec4 m_1;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat2x4(val.m_0, val.m_1), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x4 load_a_inner_3_a_2_m() {
+  return f16mat2x4(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1);
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat2x4 p_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec4 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat2x4 l_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec4 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float16_t l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..937cb3a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half2x4 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  half2x4 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  half4 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  half const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..a07d9ae
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,227 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 140
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%Inner_std140 = OpTypeStruct %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+      %Inner = OpTypeStruct %mat2v4half
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %22 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %29 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %32 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %45 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %58 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %66 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %73 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %86 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+         %98 = OpTypeFunction %mat2v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %void = OpTypeVoid
+        %115 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v4half %val 0
+         %19 = OpCompositeExtract %v4half %val 1
+         %20 = OpCompositeConstruct %mat2v4half %18 %19
+         %21 = OpCompositeConstruct %Inner %20
+               OpReturnValue %21
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %22
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %26 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %29
+          %i = OpVariable %_ptr_Function_uint Function %32
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %45
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %34 %35 None
+               OpBranch %36
+         %36 = OpLabel
+         %38 = OpLoad %uint %i
+         %39 = OpULessThan %bool %38 %uint_4
+         %37 = OpLogicalNot %bool %39
+               OpSelectionMerge %41 None
+               OpBranchConditional %37 %42 %41
+         %42 = OpLabel
+               OpBranch %34
+         %41 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_Inner %arr %46
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %50
+         %53 = OpLoad %Inner_std140 %52
+         %49 = OpFunctionCall %Inner %conv_Inner %53
+               OpStore %48 %49
+               OpBranch %35
+         %35 = OpLabel
+         %54 = OpLoad %uint %i
+         %56 = OpIAdd %uint %54 %uint_1
+               OpStore %i %56
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %57
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %58
+      %val_1 = OpFunctionParameter %Outer_std140
+         %62 = OpLabel
+         %64 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %63 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %64
+         %65 = OpCompositeConstruct %Outer %63
+               OpReturnValue %65
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %66
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %70 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %73
+        %i_0 = OpVariable %_ptr_Function_uint Function %32
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %86
+               OpBranch %75
+         %75 = OpLabel
+               OpLoopMerge %76 %77 None
+               OpBranch %78
+         %78 = OpLabel
+         %80 = OpLoad %uint %i_0
+         %81 = OpULessThan %bool %80 %uint_4
+         %79 = OpLogicalNot %bool %81
+               OpSelectionMerge %82 None
+               OpBranchConditional %79 %83 %82
+         %83 = OpLabel
+               OpBranch %76
+         %82 = OpLabel
+               OpStore %var_for_index %val_2
+         %87 = OpLoad %uint %i_0
+         %89 = OpAccessChain %_ptr_Function_Outer %arr_0 %87
+         %91 = OpLoad %uint %i_0
+         %93 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %91
+         %94 = OpLoad %Outer_std140 %93
+         %90 = OpFunctionCall %Outer %conv_Outer %94
+               OpStore %89 %90
+               OpBranch %77
+         %77 = OpLabel
+         %95 = OpLoad %uint %i_0
+         %96 = OpIAdd %uint %95 %uint_1
+               OpStore %i_0 %96
+               OpBranch %75
+         %76 = OpLabel
+         %97 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %97
+               OpFunctionEnd
+%load_a_inner_3_a_2_m = OpFunction %mat2v4half None %98
+        %100 = OpLabel
+        %106 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %109 = OpAccessChain %_ptr_Uniform_v4half %106 %uint_0
+        %110 = OpLoad %v4half %109
+        %112 = OpAccessChain %_ptr_Uniform_v4half %106 %uint_1
+        %113 = OpLoad %v4half %112
+        %114 = OpCompositeConstruct %mat2v4half %110 %113
+               OpReturnValue %114
+               OpFunctionEnd
+          %f = OpFunction %void None %115
+        %118 = OpLabel
+        %121 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %122 = OpLoad %_arr_Outer_std140_uint_4 %121
+        %119 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %122
+        %125 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %126 = OpLoad %Outer_std140 %125
+        %123 = OpFunctionCall %Outer %conv_Outer %126
+        %129 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %130 = OpLoad %_arr_Inner_std140_uint_4 %129
+        %127 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %130
+        %132 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %133 = OpLoad %Inner_std140 %132
+        %131 = OpFunctionCall %Inner %conv_Inner %133
+        %134 = OpFunctionCall %mat2v4half %load_a_inner_3_a_2_m
+        %135 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %136 = OpLoad %v4half %135
+        %138 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %32
+        %139 = OpLoad %half %138
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..8115149
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,29 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat2x4<f16> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec4<f16> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f16 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl
new file mode 100644
index 0000000..767220f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].ywxz);
+    let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2244be6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 2> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..523b5b4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,36 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 2> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002535B2A2420(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..e6f6933
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,87 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat2x4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+f16mat2x4 load_u_inner_2_m() {
+  return f16mat2x4(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  f16mat4x2 t = transpose(load_u_inner_2_m());
+  float16_t l = length(u.inner[0u].m_1.ywxz);
+  float16_t a = abs(u.inner[0u].m_1.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..4be54f8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half2x4 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  half4x2 const t = transpose((*(tint_symbol))[2].m);
+  half const l = length(half4((*(tint_symbol))[0].m[1]).ywxz);
+  half const a = fabs(half4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..3017e98
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %38 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+         %11 = OpTypeFunction %mat2v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+     %v2half = OpTypeVector %half 2
+ %mat4v2half = OpTypeMatrix %v2half 4
+         %39 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat2v4half None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v4half %19 %uint_1
+         %24 = OpLoad %v4half %23
+         %26 = OpAccessChain %_ptr_Uniform_v4half %19 %uint_2
+         %27 = OpLoad %v4half %26
+         %28 = OpCompositeConstruct %mat2v4half %24 %27
+               OpReturnValue %28
+               OpFunctionEnd
+          %f = OpFunction %void None %29
+         %32 = OpLabel
+         %36 = OpFunctionCall %mat2v4half %load_u_inner_2_m
+         %33 = OpTranspose %mat4v2half %36
+         %40 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %39 %uint_2
+         %41 = OpLoad %v4half %40
+         %42 = OpVectorShuffle %v4half %41 %41 1 3 0 2
+         %37 = OpExtInst %half %38 Length %42
+         %44 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %39 %uint_2
+         %45 = OpLoad %v4half %44
+         %46 = OpVectorShuffle %v4half %45 %45 1 3 0 2
+         %47 = OpCompositeExtract %half %46 0
+         %43 = OpExtInst %half %38 FAbs %47
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..e979bf8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].ywxz);
+  let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl
new file mode 100644
index 0000000..c5e4554
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl

@@ -0,0 +1,25 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat2x4<f16>) {}
+fn d(v : vec4<f16>) {}
+fn e(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].ywxz);
+    e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..8c5270f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,72 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 2, 4> m) {
+}
+
+void d(vector<float16_t, 4> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  d(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  e(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..226da64
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,77 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 2, 4> m) {
+}
+
+void d(vector<float16_t, 4> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  d(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  uint2 ubo_load_5 = u[1].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  e(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001B118E73590(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..20e586a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,118 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat2x4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(f16mat2x4 m) {
+}
+
+void d(f16vec4 v) {
+}
+
+void e(float16_t f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat2x4(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x4 load_u_inner_2_m() {
+  return f16mat2x4(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.ywxz);
+  e(u.inner[0u].m_1.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..d555b48
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half2x4 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(half2x4 m) {
+}
+
+void d(half4 v) {
+}
+
+void e(half f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(half4((*(tint_symbol))[0].m[1]).ywxz);
+  e(half4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..da8c5de
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,204 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 119
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat2v4half = OpTypeMatrix %v4half 2
+          %S = OpTypeStruct %int %mat2v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat2v4half
+         %27 = OpTypeFunction %void %v4half
+         %31 = OpTypeFunction %void %half
+         %35 = OpTypeFunction %S %S_std140
+         %45 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %51 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %54 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %67 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %80 = OpTypeFunction %mat2v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+         %96 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat2v4half
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v4half
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %half
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v4half %val 1
+         %41 = OpCompositeExtract %v4half %val 2
+         %42 = OpCompositeConstruct %mat2v4half %40 %41
+         %43 = OpCompositeExtract %int %val 3
+         %44 = OpCompositeConstruct %S %39 %42 %43
+               OpReturnValue %44
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %45
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %48 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %51
+          %i = OpVariable %_ptr_Function_uint Function %54
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %67
+               OpBranch %55
+         %55 = OpLabel
+               OpLoopMerge %56 %57 None
+               OpBranch %58
+         %58 = OpLabel
+         %60 = OpLoad %uint %i
+         %61 = OpULessThan %bool %60 %uint_4
+         %59 = OpLogicalNot %bool %61
+               OpSelectionMerge %63 None
+               OpBranchConditional %59 %64 %63
+         %64 = OpLabel
+               OpBranch %56
+         %63 = OpLabel
+               OpStore %var_for_index %val_0
+         %68 = OpLoad %uint %i
+         %70 = OpAccessChain %_ptr_Function_S %arr %68
+         %72 = OpLoad %uint %i
+         %74 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %72
+         %75 = OpLoad %S_std140 %74
+         %71 = OpFunctionCall %S %conv_S %75
+               OpStore %70 %71
+               OpBranch %57
+         %57 = OpLabel
+         %76 = OpLoad %uint %i
+         %78 = OpIAdd %uint %76 %uint_1
+               OpStore %i %78
+               OpBranch %55
+         %56 = OpLabel
+         %79 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %79
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v4half None %80
+         %82 = OpLabel
+         %87 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %90 = OpAccessChain %_ptr_Uniform_v4half %87 %uint_1
+         %91 = OpLoad %v4half %90
+         %93 = OpAccessChain %_ptr_Uniform_v4half %87 %uint_2
+         %94 = OpLoad %v4half %93
+         %95 = OpCompositeConstruct %mat2v4half %91 %94
+               OpReturnValue %95
+               OpFunctionEnd
+          %f = OpFunction %void None %96
+         %98 = OpLabel
+        %102 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %103 = OpLoad %_arr_S_std140_uint_4 %102
+        %100 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %103
+         %99 = OpFunctionCall %void %a %100
+        %106 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %107 = OpLoad %S_std140 %106
+        %105 = OpFunctionCall %S %conv_S %107
+        %104 = OpFunctionCall %void %b %105
+        %109 = OpFunctionCall %mat2v4half %load_u_inner_2_m
+        %108 = OpFunctionCall %void %c %109
+        %111 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %54 %uint_2
+        %112 = OpLoad %v4half %111
+        %113 = OpVectorShuffle %v4half %112 %112 1 3 0 2
+        %110 = OpFunctionCall %void %d %113
+        %115 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %54 %uint_2
+        %116 = OpLoad %v4half %115
+        %117 = OpVectorShuffle %v4half %116 %116 1 3 0 2
+        %118 = OpCompositeExtract %half %117 0
+        %114 = OpFunctionCall %void %e %118
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..6a3e9ac
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat2x4<f16>) {
+}
+
+fn d(v : vec4<f16>) {
+}
+
+fn e(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].ywxz);
+  e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl
new file mode 100644
index 0000000..c7e8bbe
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e1907c3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,54 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 2, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  p[1].m[0] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2e389d7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,59 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 2, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  p[1].m[0] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz;
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000012E5540B3A0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..24c839a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,103 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat2x4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat2x4(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x4 load_u_inner_2_m() {
+  return f16mat2x4(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..c25d4bb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half2x4 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = half4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..0d6a9d2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,171 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 101
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+          %S = OpTypeStruct %int %mat2v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %35 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %48 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %61 = OpTypeFunction %mat2v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %void = OpTypeVoid
+         %77 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat2v4half = OpTypePointer Private %mat2v4half
+         %95 = OpConstantNull %int
+%_ptr_Private_v4half = OpTypePointer Private %v4half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v4half %val 1
+         %23 = OpCompositeExtract %v4half %val 2
+         %24 = OpCompositeConstruct %mat2v4half %22 %23
+         %25 = OpCompositeExtract %int %val 3
+         %26 = OpCompositeConstruct %S %21 %24 %25
+               OpReturnValue %26
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %27
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %30 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %35
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %48
+               OpBranch %36
+         %36 = OpLabel
+               OpLoopMerge %37 %38 None
+               OpBranch %39
+         %39 = OpLabel
+         %41 = OpLoad %uint %i
+         %42 = OpULessThan %bool %41 %uint_4
+         %40 = OpLogicalNot %bool %42
+               OpSelectionMerge %44 None
+               OpBranchConditional %40 %45 %44
+         %45 = OpLabel
+               OpBranch %37
+         %44 = OpLabel
+               OpStore %var_for_index %val_0
+         %49 = OpLoad %uint %i
+         %51 = OpAccessChain %_ptr_Function_S %arr %49
+         %53 = OpLoad %uint %i
+         %55 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %53
+         %56 = OpLoad %S_std140 %55
+         %52 = OpFunctionCall %S %conv_S %56
+               OpStore %51 %52
+               OpBranch %38
+         %38 = OpLabel
+         %57 = OpLoad %uint %i
+         %59 = OpIAdd %uint %57 %uint_1
+               OpStore %i %59
+               OpBranch %36
+         %37 = OpLabel
+         %60 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %60
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v4half None %61
+         %63 = OpLabel
+         %68 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %71 = OpAccessChain %_ptr_Uniform_v4half %68 %uint_1
+         %72 = OpLoad %v4half %71
+         %74 = OpAccessChain %_ptr_Uniform_v4half %68 %uint_2
+         %75 = OpLoad %v4half %74
+         %76 = OpCompositeConstruct %mat2v4half %72 %75
+               OpReturnValue %76
+               OpFunctionEnd
+          %f = OpFunction %void None %77
+         %80 = OpLabel
+         %83 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %84 = OpLoad %_arr_S_std140_uint_4 %83
+         %81 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %84
+               OpStore %p %81
+         %87 = OpAccessChain %_ptr_Private_S %p %int_1
+         %89 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %90 = OpLoad %S_std140 %89
+         %88 = OpFunctionCall %S %conv_S %90
+               OpStore %87 %88
+         %93 = OpAccessChain %_ptr_Private_mat2v4half %p %int_3 %uint_1
+         %94 = OpFunctionCall %mat2v4half %load_u_inner_2_m
+               OpStore %93 %94
+         %97 = OpAccessChain %_ptr_Private_v4half %p %int_1 %uint_1 %95
+         %98 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %35 %uint_2
+         %99 = OpLoad %v4half %98
+        %100 = OpVectorShuffle %v4half %99 %99 1 3 0 2
+               OpStore %97 %100
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..786eded
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl
new file mode 100644
index 0000000..1f8799d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..10a3807
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,74 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 2, 4> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  s.Store<vector<float16_t, 4> >(136u, vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..371bd43
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,79 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 2, 4> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  s.Store<vector<float16_t, 4> >(136u, vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000270F5E70100(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..e8dce94
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,106 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat2x4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat2x4(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x4 load_u_inner_2_m() {
+  return f16mat2x4(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..3b7a3c3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half2x4 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = half4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..6f415cb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,180 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 104
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+          %S = OpTypeStruct %int %mat2v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %27 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %33 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %36 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %49 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %62 = OpTypeFunction %mat2v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %void = OpTypeVoid
+         %78 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat2v4half = OpTypePointer StorageBuffer %mat2v4half
+         %98 = OpConstantNull %int
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v4half %val 1
+         %23 = OpCompositeExtract %v4half %val 2
+         %24 = OpCompositeConstruct %mat2v4half %22 %23
+         %25 = OpCompositeExtract %int %val 3
+         %26 = OpCompositeConstruct %S %21 %24 %25
+               OpReturnValue %26
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %27
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %30 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %33
+          %i = OpVariable %_ptr_Function_uint Function %36
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
+               OpBranch %37
+         %37 = OpLabel
+               OpLoopMerge %38 %39 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %uint %i
+         %43 = OpULessThan %bool %42 %uint_4
+         %41 = OpLogicalNot %bool %43
+               OpSelectionMerge %45 None
+               OpBranchConditional %41 %46 %45
+         %46 = OpLabel
+               OpBranch %38
+         %45 = OpLabel
+               OpStore %var_for_index %val_0
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_S %arr %50
+         %54 = OpLoad %uint %i
+         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
+         %57 = OpLoad %S_std140 %56
+         %53 = OpFunctionCall %S %conv_S %57
+               OpStore %52 %53
+               OpBranch %39
+         %39 = OpLabel
+         %58 = OpLoad %uint %i
+         %60 = OpIAdd %uint %58 %uint_1
+               OpStore %i %60
+               OpBranch %37
+         %38 = OpLabel
+         %61 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %61
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v4half None %62
+         %64 = OpLabel
+         %69 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %72 = OpAccessChain %_ptr_Uniform_v4half %69 %uint_1
+         %73 = OpLoad %v4half %72
+         %75 = OpAccessChain %_ptr_Uniform_v4half %69 %uint_2
+         %76 = OpLoad %v4half %75
+         %77 = OpCompositeConstruct %mat2v4half %73 %76
+               OpReturnValue %77
+               OpFunctionEnd
+          %f = OpFunction %void None %78
+         %81 = OpLabel
+         %83 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %86 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %87 = OpLoad %_arr_S_std140_uint_4 %86
+         %84 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %87
+               OpStore %83 %84
+         %90 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %92 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %93 = OpLoad %S_std140 %92
+         %91 = OpFunctionCall %S %conv_S %93
+               OpStore %90 %91
+         %96 = OpAccessChain %_ptr_StorageBuffer_mat2v4half %s %uint_0 %int_3 %uint_1
+         %97 = OpFunctionCall %mat2v4half %load_u_inner_2_m
+               OpStore %96 %97
+        %100 = OpAccessChain %_ptr_StorageBuffer_v4half %s %uint_0 %int_1 %uint_1 %98
+        %101 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %36 %uint_2
+        %102 = OpLoad %v4half %101
+        %103 = OpVectorShuffle %v4half %102 %102 1 3 0 2
+               OpStore %100 %103
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..827edf0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..0af4994
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e57c69b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,70 @@
+struct S {
+  int before;
+  matrix<float16_t, 2, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 4> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  w[1].m[0] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d2b1283
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,75 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 2, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 4> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_4 = u[1].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  w[1].m[0] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001B747E825C0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..d8f581b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,111 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat2x4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat2x4(val.m_0, val.m_1), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat2x4 load_u_inner_2_m() {
+  return f16mat2x4(u.inner[2u].m_0, u.inner[2u].m_1);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, f16mat2x4(f16vec4(0.0hf), f16vec4(0.0hf)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..0e256d8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half2x4 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = half4((*(tint_symbol_2))[0].m[1]).ywxz;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..81a811f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,214 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 126
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+          %S = OpTypeStruct %int %mat2v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %34 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat2v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %void = OpTypeVoid
+         %79 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %97 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat2v4half = OpTypePointer Workgroup %mat2v4half
+        %115 = OpConstantNull %int
+%_ptr_Workgroup_v4half = OpTypePointer Workgroup %v4half
+        %121 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v4half %val 1
+         %24 = OpCompositeExtract %v4half %val 2
+         %25 = OpCompositeConstruct %mat2v4half %23 %24
+         %26 = OpCompositeExtract %int %val 3
+         %27 = OpCompositeConstruct %S %22 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat2v4half None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v4half %70 %uint_1
+         %74 = OpLoad %v4half %73
+         %76 = OpAccessChain %_ptr_Uniform_v4half %70 %uint_2
+         %77 = OpLoad %v4half %76
+         %78 = OpCompositeConstruct %mat2v4half %74 %77
+               OpReturnValue %78
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %79
+%local_invocation_index = OpFunctionParameter %uint
+         %83 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %37
+               OpStore %idx %local_invocation_index
+               OpBranch %85
+         %85 = OpLabel
+               OpLoopMerge %86 %87 None
+               OpBranch %88
+         %88 = OpLabel
+         %90 = OpLoad %uint %idx
+         %91 = OpULessThan %bool %90 %uint_4
+         %89 = OpLogicalNot %bool %91
+               OpSelectionMerge %92 None
+               OpBranchConditional %89 %93 %92
+         %93 = OpLabel
+               OpBranch %86
+         %92 = OpLabel
+         %94 = OpLoad %uint %idx
+         %96 = OpAccessChain %_ptr_Workgroup_S %w %94
+               OpStore %96 %97
+               OpBranch %87
+         %87 = OpLabel
+         %98 = OpLoad %uint %idx
+         %99 = OpIAdd %uint %98 %uint_1
+               OpStore %idx %99
+               OpBranch %85
+         %86 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %104 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %105 = OpLoad %_arr_S_std140_uint_4 %104
+        %102 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %105
+               OpStore %w %102
+        %107 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %109 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %110 = OpLoad %S_std140 %109
+        %108 = OpFunctionCall %S %conv_S %110
+               OpStore %107 %108
+        %113 = OpAccessChain %_ptr_Workgroup_mat2v4half %w %int_3 %uint_1
+        %114 = OpFunctionCall %mat2v4half %load_u_inner_2_m
+               OpStore %113 %114
+        %117 = OpAccessChain %_ptr_Workgroup_v4half %w %int_1 %uint_1 %115
+        %118 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %37 %uint_2
+        %119 = OpLoad %v4half %118
+        %120 = OpVectorShuffle %v4half %119 %119 1 3 0 2
+               OpStore %117 %120
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %121
+        %123 = OpLabel
+        %125 = OpLoad %uint %local_invocation_index_1
+        %124 = OpFunctionCall %void %f_inner %125
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..4a6de68
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat2x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..a36d6af
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,32 @@
+struct Inner {
+  @size(64)
+  m : mat2x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat2x4<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec4<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0d53f5d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,75 @@
+struct Inner {
+  float2x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x4 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float2x4 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float4 l_a_i_a_i_m_i = asfloat(a[scalar_offset_2 / 4]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_3 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_3 / 4][scalar_offset_3 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0d53f5d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,75 @@
+struct Inner {
+  float2x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x4 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float2x4 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_2 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float4 l_a_i_a_i_m_i = asfloat(a[scalar_offset_2 / 4]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_3 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_3 / 4][scalar_offset_3 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..5503c5a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,50 @@
+#version 310 es
+
+struct Inner {
+  mat2x4 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_a_i_save = tint_symbol_1;
+  int tint_symbol_2 = i();
+  int p_a_i_a_i_m_i_save = tint_symbol_2;
+  Outer l_a[4] = a.inner;
+  Outer l_a_i = a.inner[p_a_i_save];
+  Inner l_a_i_a[4] = a.inner[p_a_i_save].a;
+  Inner l_a_i_a_i = a.inner[p_a_i_save].a[p_a_i_a_i_save];
+  mat2x4 l_a_i_a_i_m = a.inner[p_a_i_save].a[p_a_i_a_i_save].m;
+  vec4 l_a_i_a_i_m_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int tint_symbol_3 = i();
+  float l_a_i_a_i_m_i_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..6092bcc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float2x4 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  float2x4 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  float4 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..3fd6f76
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,88 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 54
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+      %Inner = OpTypeStruct %mat2v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %14 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %14
+         %17 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %24 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %i = OpFunction %int None %17
+         %19 = OpLabel
+         %20 = OpLoad %int %counter
+         %22 = OpIAdd %int %20 %int_1
+               OpStore %counter %22
+         %23 = OpLoad %int %counter
+               OpReturnValue %23
+               OpFunctionEnd
+          %f = OpFunction %void None %24
+         %27 = OpLabel
+         %28 = OpFunctionCall %int %i
+         %29 = OpFunctionCall %int %i
+         %30 = OpFunctionCall %int %i
+         %33 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %34 = OpLoad %_arr_Outer_uint_4 %33
+         %36 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %28
+         %37 = OpLoad %Outer %36
+         %39 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %28 %uint_0
+         %40 = OpLoad %_arr_Inner_uint_4 %39
+         %42 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %28 %uint_0 %29
+         %43 = OpLoad %Inner %42
+         %45 = OpAccessChain %_ptr_Uniform_mat2v4float %a %uint_0 %28 %uint_0 %29 %uint_0
+         %46 = OpLoad %mat2v4float %45
+         %48 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %28 %uint_0 %29 %uint_0 %30
+         %49 = OpLoad %v4float %48
+         %50 = OpFunctionCall %int %i
+         %52 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %28 %uint_0 %29 %uint_0 %30 %50
+         %53 = OpLoad %float %52
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..6ae9702
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+struct Inner {
+  @size(64)
+  m : mat2x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat2x4<f32> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec4<f32> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..4ae3bd0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,29 @@
+struct Inner {
+  @size(64)
+  m : mat2x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat2x4<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec4<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9b489d1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,60 @@
+struct Inner {
+  float2x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float2x4 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float2x4 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float4 l_a_3_a_2_m_1 = asfloat(a[57]);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..9b489d1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,60 @@
+struct Inner {
+  float2x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float2x4 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float2x4 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float4 l_a_3_a_2_m_1 = asfloat(a[57]);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..d832c5c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,37 @@
+#version 310 es
+
+struct Inner {
+  mat2x4 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+void f() {
+  Outer l_a[4] = a.inner;
+  Outer l_a_3 = a.inner[3];
+  Inner l_a_3_a[4] = a.inner[3].a;
+  Inner l_a_3_a_2 = a.inner[3].a[2];
+  mat2x4 l_a_3_a_2_m = a.inner[3].a[2].m;
+  vec4 l_a_3_a_2_m_1 = a.inner[3].a[2].m[1];
+  float l_a_3_a_2_m_1_0 = a.inner[3].a[2].m[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..cf252cb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float2x4 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  float2x4 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  float4 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..1d899cf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,73 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+      %Inner = OpTypeStruct %mat2v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+        %int = OpTypeInt 32 1
+      %int_3 = OpConstant %int 3
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+         %40 = OpConstantNull %int
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %20 = OpLoad %_arr_Outer_uint_4 %19
+         %24 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %int_3
+         %25 = OpLoad %Outer %24
+         %27 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %int_3 %uint_0
+         %28 = OpLoad %_arr_Inner_uint_4 %27
+         %31 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %int_3 %uint_0 %int_2
+         %32 = OpLoad %Inner %31
+         %34 = OpAccessChain %_ptr_Uniform_mat2v4float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0
+         %35 = OpLoad %mat2v4float %34
+         %38 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1
+         %39 = OpLoad %v4float %38
+         %42 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1 %40
+         %43 = OpLoad %float %42
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..c29901f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,27 @@
+struct Inner {
+  @size(64)
+  m : mat2x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat2x4<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec4<f32> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl
new file mode 100644
index 0000000..10fd2f2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat2x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].ywxz);
+    let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c7ea341
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float2x4 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x2 t = transpose(tint_symbol(u, 272u));
+  const float l = length(asfloat(u[2]).ywxz);
+  const float a = abs(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c7ea341
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float2x4 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x2 t = transpose(tint_symbol(u, 272u));
+  const float l = length(asfloat(u[2]).ywxz);
+  const float a = abs(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..21be319
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,45 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat2x4 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void f() {
+  mat4x2 t = transpose(u.inner[2].m);
+  float l = length(u.inner[0].m[1].ywxz);
+  float a = abs(u.inner[0].m[1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..5b9dd4c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float2x4 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float4x2 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float4((*(tint_symbol))[0].m[1]).ywxz);
+  float const a = fabs(float4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..e9d901a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,67 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 38
+; Schema: 0
+               OpCapability Shader
+         %26 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+          %S = OpTypeStruct %int %mat2v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+    %v2float = OpTypeVector %float 2
+%mat4v2float = OpTypeMatrix %v2float 4
+     %uint_0 = OpConstant %uint 0
+      %int_2 = OpConstant %int 2
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+         %27 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %23 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0 %int_2 %uint_1
+         %24 = OpLoad %mat2v4float %23
+         %16 = OpTranspose %mat4v2float %24
+         %30 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %27 %uint_1 %int_1
+         %31 = OpLoad %v4float %30
+         %32 = OpVectorShuffle %v4float %31 %31 1 3 0 2
+         %25 = OpExtInst %float %26 Length %32
+         %34 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %27 %uint_1 %int_1
+         %35 = OpLoad %v4float %34
+         %36 = OpVectorShuffle %v4float %35 %35 1 3 0 2
+         %37 = OpCompositeExtract %float %36 0
+         %33 = OpExtInst %float %26 FAbs %37
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..d6255d0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat2x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].ywxz);
+  let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl
new file mode 100644
index 0000000..7a8c874
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl

@@ -0,0 +1,23 @@
+struct S {
+  before : i32,
+  m : mat2x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat2x4<f32>) {}
+fn d(v : vec4<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].ywxz);
+    e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c3c6996
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,58 @@
+struct S {
+  int before;
+  float2x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float2x4 m) {
+}
+
+void d(float4 v) {
+}
+
+void e(float f_1) {
+}
+
+float2x4 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 272u));
+  d(asfloat(u[2]).ywxz);
+  e(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c3c6996
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,58 @@
+struct S {
+  int before;
+  float2x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float2x4 m) {
+}
+
+void d(float4 v) {
+}
+
+void e(float f_1) {
+}
+
+float2x4 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 272u));
+  d(asfloat(u[2]).ywxz);
+  e(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..4085677
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,62 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat2x4 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat2x4 m) {
+}
+
+void d(vec4 v) {
+}
+
+void e(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[2]);
+  c(u.inner[2].m);
+  d(u.inner[0].m[1].ywxz);
+  e(u.inner[0].m[1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..1914a9d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float2x4 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float2x4 m) {
+}
+
+void d(float4 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float4((*(tint_symbol))[0].m[1]).ywxz);
+  e(float4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..5904072
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,112 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 63
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+          %S = OpTypeStruct %int %mat2v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %_arr_S_uint_4
+         %17 = OpTypeFunction %void %S
+         %21 = OpTypeFunction %void %mat2v4float
+         %25 = OpTypeFunction %void %v4float
+         %29 = OpTypeFunction %void %float
+         %33 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+         %52 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %a = OpFunction %void None %12
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %s = OpFunctionParameter %S
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %m = OpFunctionParameter %mat2v4float
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+          %v = OpFunctionParameter %v4float
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %29
+        %f_1 = OpFunctionParameter %float
+         %32 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %40 = OpLoad %_arr_S_uint_4 %39
+         %36 = OpFunctionCall %void %a %40
+         %44 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %45 = OpLoad %S %44
+         %41 = OpFunctionCall %void %b %45
+         %49 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0 %int_2 %uint_1
+         %50 = OpLoad %mat2v4float %49
+         %46 = OpFunctionCall %void %c %50
+         %55 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %52 %uint_1 %int_1
+         %56 = OpLoad %v4float %55
+         %57 = OpVectorShuffle %v4float %56 %56 1 3 0 2
+         %51 = OpFunctionCall %void %d %57
+         %59 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %52 %uint_1 %int_1
+         %60 = OpLoad %v4float %59
+         %61 = OpVectorShuffle %v4float %60 %60 1 3 0 2
+         %62 = OpCompositeExtract %float %61 0
+         %58 = OpFunctionCall %void %e %62
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..1422e88
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,32 @@
+struct S {
+  before : i32,
+  m : mat2x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat2x4<f32>) {
+}
+
+fn d(v : vec4<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].ywxz);
+  e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl
new file mode 100644
index 0000000..c8add56
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..55acd61
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,43 @@
+struct S {
+  int before;
+  float2x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float2x4 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 272u);
+  p[1].m[0] = asfloat(u[2]).ywxz;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..55acd61
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,43 @@
+struct S {
+  int before;
+  float2x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float2x4 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 272u);
+  p[1].m[0] = asfloat(u[2]).ywxz;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..4ac335c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,47 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat2x4 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, 0u, 0u, mat2x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat2x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat2x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat2x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[3].m = u.inner[2].m;
+  p[1].m[0] = u.inner[0].m[1].ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..4732140
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float2x4 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..43b3c76
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,78 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+          %S = OpTypeStruct %int %mat2v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %14 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %14
+       %void = OpTypeVoid
+         %15 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_Private_mat2v4float = OpTypePointer Private %mat2v4float
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+         %37 = OpConstantNull %int
+%_ptr_Private_v4float = OpTypePointer Private %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %15
+         %18 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %22 = OpLoad %_arr_S_uint_4 %21
+               OpStore %p %22
+         %25 = OpAccessChain %_ptr_Private_S %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %29 = OpLoad %S %28
+               OpStore %25 %29
+         %33 = OpAccessChain %_ptr_Private_mat2v4float %p %int_3 %uint_1
+         %35 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0 %int_2 %uint_1
+         %36 = OpLoad %mat2v4float %35
+               OpStore %33 %36
+         %39 = OpAccessChain %_ptr_Private_v4float %p %int_1 %uint_1 %37
+         %41 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %37 %uint_1 %int_1
+         %42 = OpLoad %v4float %41
+         %43 = OpVectorShuffle %v4float %42 %42 1 3 0 2
+               OpStore %39 %43
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..34d2f9a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat2x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl
new file mode 100644
index 0000000..075fc8a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2146713
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,63 @@
+struct S {
+  int before;
+  float2x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float2x4 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 400u, tint_symbol_8(u, 272u));
+  s.Store4(144u, asuint(asfloat(u[2]).ywxz));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2146713
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,63 @@
+struct S {
+  int before;
+  float2x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float2x4 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 400u, tint_symbol_8(u, 272u));
+  s.Store4(144u, asuint(asfloat(u[2]).ywxz));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..d3082c7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,50 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat2x4 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[3].m = u.inner[2].m;
+  s.inner[1].m[0] = u.inner[0].m[1].ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..b3cd658
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float2x4 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..5ba1ccb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+          %S = OpTypeStruct %int %mat2v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+         %38 = OpConstantNull %int
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %14
+         %17 = OpLabel
+         %20 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %22 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %23 = OpLoad %_arr_S_uint_4 %22
+               OpStore %20 %23
+         %26 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %30 = OpLoad %S %29
+               OpStore %26 %30
+         %34 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %s %uint_0 %int_3 %uint_1
+         %36 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0 %int_2 %uint_1
+         %37 = OpLoad %mat2v4float %36
+               OpStore %34 %37
+         %40 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1 %uint_1 %38
+         %42 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %38 %uint_1 %int_1
+         %43 = OpLoad %v4float %42
+         %44 = OpVectorShuffle %v4float %43 %43 1 3 0 2
+               OpStore %40 %44
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..ff82416
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat2x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..93675a8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat2x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9f23bf3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,59 @@
+struct S {
+  int before;
+  float2x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x4 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 272u);
+  w[1].m[0] = asfloat(u[2]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..9f23bf3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,59 @@
+struct S {
+  int before;
+  float2x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x4 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 272u);
+  w[1].m[0] = asfloat(u[2]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..c16c8ff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,55 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat2x4 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+shared S w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, 0u, 0u, mat2x4(vec4(0.0f), vec4(0.0f)), 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[3].m = u.inner[2].m;
+  w[1].m[0] = u.inner[0].m[1].ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..d3138fa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float2x4 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float4((*(tint_symbol_2))[0].m[1]).ywxz;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..ddf908d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,124 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 72
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+          %S = OpTypeStruct %int %mat2v4float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+       %void = OpTypeVoid
+         %16 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %23 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %37 = OpConstantNull %S
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat2v4float = OpTypePointer Workgroup %mat2v4float
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+         %60 = OpConstantNull %int
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+         %67 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %16
+%local_invocation_index = OpFunctionParameter %uint
+         %20 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %23
+               OpStore %idx %local_invocation_index
+               OpBranch %24
+         %24 = OpLabel
+               OpLoopMerge %25 %26 None
+               OpBranch %27
+         %27 = OpLabel
+         %29 = OpLoad %uint %idx
+         %30 = OpULessThan %bool %29 %uint_4
+         %28 = OpLogicalNot %bool %30
+               OpSelectionMerge %32 None
+               OpBranchConditional %28 %33 %32
+         %33 = OpLabel
+               OpBranch %25
+         %32 = OpLabel
+         %34 = OpLoad %uint %idx
+         %36 = OpAccessChain %_ptr_Workgroup_S %w %34
+               OpStore %36 %37
+               OpBranch %26
+         %26 = OpLabel
+         %38 = OpLoad %uint %idx
+         %40 = OpIAdd %uint %38 %uint_1
+               OpStore %idx %40
+               OpBranch %24
+         %25 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %46 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %47 = OpLoad %_arr_S_uint_4 %46
+               OpStore %w %47
+         %49 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+         %52 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %53 = OpLoad %S %52
+               OpStore %49 %53
+         %56 = OpAccessChain %_ptr_Workgroup_mat2v4float %w %int_3 %uint_1
+         %58 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0 %int_2 %uint_1
+         %59 = OpLoad %mat2v4float %58
+               OpStore %56 %59
+         %62 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1 %uint_1 %60
+         %64 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %60 %uint_1 %int_1
+         %65 = OpLoad %v4float %64
+         %66 = OpVectorShuffle %v4float %65 %65 1 3 0 2
+               OpStore %62 %66
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %67
+         %69 = OpLabel
+         %71 = OpLoad %uint %local_invocation_index_1
+         %70 = OpFunctionCall %void %f_inner %71
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..7f37f49
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat2x4_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat2x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl
deleted file mode 100644
index 92c91a8..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl
+++ /dev/null

@@ -1,31 +0,0 @@
-struct Inner {
-  @size(64) m : mat3x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-var<private> counter = 0;
-fn i() -> i32 { counter++; return counter; }
-
-@compute @workgroup_size(1)
-fn f() {
-  let p_a           = &a;
-  let p_a_i         = &((*p_a)[i()]);
-  let p_a_i_a       = &((*p_a_i).a);
-  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
-  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
-  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
-
-
-  let l_a             : array<Outer, 4> =  *p_a;
-  let l_a_i           : Outer           =  *p_a_i;
-  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
-  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
-  let l_a_i_a_i_m     : mat3x2<f32>     =  *p_a_i_a_i_m;
-  let l_a_i_a_i_m_i   : vec2<f32>       =  *p_a_i_a_i_m_i;
-  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl
deleted file mode 100644
index d6e6f10..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl
+++ /dev/null

@@ -1,28 +0,0 @@
-struct Inner {
-  @size(64) m : mat3x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  let p_a = &a;
-  let p_a_3 = &((*p_a)[3]);
-  let p_a_3_a = &((*p_a_3).a);
-  let p_a_3_a_2 = &((*p_a_3_a)[2]);
-  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
-  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
-
-
-  let l_a             : array<Outer, 4> = *p_a;
-  let l_a_3           : Outer           = *p_a_3;
-  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
-  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
-  let l_a_3_a_2_m     : mat3x2<f32>     = *p_a_3_a_2_m;
-  let l_a_3_a_2_m_1   : vec2<f32>       = *p_a_3_a_2_m_1;
-  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl
deleted file mode 100644
index 48cde20..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl
+++ /dev/null

@@ -1,14 +0,0 @@
-struct S {
-  before : i32,
-  @size(32) m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    let t = transpose(u[2].m);
-    let l = length(u[0].m[1].yx);
-    let a = abs(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 9f083f6..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,21 +0,0 @@
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-
-float3x2 tint_symbol(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const float2x3 t = transpose(tint_symbol(u, 104u));
-  const float l = length(asfloat(u[1].xy).yx);
-  const float a = abs(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 9f083f6..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,21 +0,0 @@
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-
-float3x2 tint_symbol(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const float2x3 t = transpose(tint_symbol(u, 104u));
-  const float l = length(asfloat(u[1].xy).yx);
-  const float a = abs(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.glsl
deleted file mode 100644
index 93a683e..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.glsl
+++ /dev/null

@@ -1,43 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat3x2 m;
-  uint pad_1;
-  uint pad_2;
-  int after;
-  uint pad_3;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  uint pad_1;
-  uint pad_2;
-  int after;
-  uint pad_3;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-mat3x2 load_u_inner_2_m() {
-  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
-}
-
-void f() {
-  mat2x3 t = transpose(load_u_inner_2_m());
-  float l = length(u.inner[0u].m_1.yx);
-  float a = abs(u.inner[0u].m_1.yx[0u]);
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.msl
deleted file mode 100644
index a65b7f0..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.msl
+++ /dev/null

@@ -1,32 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float3x2 m;
-  /* 0x0020 */ tint_array<int8_t, 8> tint_pad_1;
-  /* 0x0028 */ int after;
-  /* 0x002c */ tint_array<int8_t, 4> tint_pad_2;
-};
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
-  float2x3 const t = transpose((*(tint_symbol))[2].m);
-  float const l = length(float2((*(tint_symbol))[0].m[1]).yx);
-  float const a = fabs(float2((*(tint_symbol))[0].m[1]).yx[0]);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.spvasm
deleted file mode 100644
index ab0ed96..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.spvasm
+++ /dev/null

@@ -1,82 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 52
-; Schema: 0
-               OpCapability Shader
-         %42 = OpExtInstImport "GLSL.std.450"
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "m_2"
-               OpMemberName %S_std140 4 "after"
-               OpName %u "u"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpMemberDecorate %S_std140 4 Offset 40
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat3v2float = OpTypeMatrix %v2float 3
-         %11 = OpTypeFunction %mat3v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-     %uint_1 = OpConstant %uint 1
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-     %uint_3 = OpConstant %uint 3
-       %void = OpTypeVoid
-         %33 = OpTypeFunction %void
-    %v3float = OpTypeVector %float 3
-%mat2v3float = OpTypeMatrix %v3float 2
-         %43 = OpConstantNull %uint
-%load_u_inner_2_m = OpFunction %mat3v2float None %11
-         %14 = OpLabel
-         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %23 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_1
-         %24 = OpLoad %v2float %23
-         %26 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_2
-         %27 = OpLoad %v2float %26
-         %30 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_3
-         %31 = OpLoad %v2float %30
-         %32 = OpCompositeConstruct %mat3v2float %24 %27 %31
-               OpReturnValue %32
-               OpFunctionEnd
-          %f = OpFunction %void None %33
-         %36 = OpLabel
-         %40 = OpFunctionCall %mat3v2float %load_u_inner_2_m
-         %37 = OpTranspose %mat2v3float %40
-         %44 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %43 %uint_2
-         %45 = OpLoad %v2float %44
-         %46 = OpVectorShuffle %v2float %45 %45 1 0
-         %41 = OpExtInst %float %42 Length %46
-         %48 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %43 %uint_2
-         %49 = OpLoad %v2float %48
-         %50 = OpVectorShuffle %v2float %49 %49 1 0
-         %51 = OpCompositeExtract %float %50 0
-         %47 = OpExtInst %float %42 FAbs %51
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.wgsl
deleted file mode 100644
index 5f718b3..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_builtin.wgsl.expected.wgsl
+++ /dev/null

@@ -1,15 +0,0 @@
-struct S {
-  before : i32,
-  @size(32)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  let t = transpose(u[2].m);
-  let l = length(u[0].m[1].yx);
-  let a = abs(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl
deleted file mode 100644
index 2248b39..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl
+++ /dev/null

@@ -1,22 +0,0 @@
-struct S {
-  before : i32,
-  @size(64) m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-fn a(a : array<S, 4>) {}
-fn b(s : S) {}
-fn c(m : mat3x2<f32>) {}
-fn d(v : vec2<f32>) {}
-fn e(f : f32) {}
-
-@compute @workgroup_size(1)
-fn f() {
-    a(u);
-    b(u[2]);
-    c(u[2].m);
-    d(u[0].m[1].yx);
-    e(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 8474d05..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,62 +0,0 @@
-struct S {
-  int before;
-  float3x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[20];
-};
-
-void a(S a_1[4]) {
-}
-
-void b(S s) {
-}
-
-void c(float3x2 m) {
-}
-
-void d(float2 v) {
-}
-
-void e(float f_1) {
-}
-
-float3x2 tint_symbol_3(uint4 buffer[20], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[20], uint offset) {
-  const uint scalar_offset_3 = ((offset + 0u)) / 4;
-  const uint scalar_offset_4 = ((offset + 72u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[20], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 80u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  a(tint_symbol(u, 0u));
-  b(tint_symbol_1(u, 160u));
-  c(tint_symbol_3(u, 168u));
-  d(asfloat(u[1].xy).yx);
-  e(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 8474d05..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,62 +0,0 @@
-struct S {
-  int before;
-  float3x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[20];
-};
-
-void a(S a_1[4]) {
-}
-
-void b(S s) {
-}
-
-void c(float3x2 m) {
-}
-
-void d(float2 v) {
-}
-
-void e(float f_1) {
-}
-
-float3x2 tint_symbol_3(uint4 buffer[20], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[20], uint offset) {
-  const uint scalar_offset_3 = ((offset + 0u)) / 4;
-  const uint scalar_offset_4 = ((offset + 72u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[20], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 80u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  a(tint_symbol(u, 0u));
-  b(tint_symbol_1(u, 160u));
-  c(tint_symbol_3(u, 168u));
-  d(asfloat(u[1].xy).yx);
-  e(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.glsl
deleted file mode 100644
index 41ff2a8..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.glsl
+++ /dev/null

@@ -1,90 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat3x2 m;
-  uint pad_1;
-  uint pad_2;
-  uint pad_3;
-  uint pad_4;
-  uint pad_5;
-  uint pad_6;
-  uint pad_7;
-  uint pad_8;
-  uint pad_9;
-  uint pad_10;
-  int after;
-  uint pad_11;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  uint pad_1;
-  uint pad_2;
-  uint pad_3;
-  uint pad_4;
-  uint pad_5;
-  uint pad_6;
-  uint pad_7;
-  uint pad_8;
-  uint pad_9;
-  uint pad_10;
-  int after;
-  uint pad_11;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-void a(S a_1[4]) {
-}
-
-void b(S s) {
-}
-
-void c(mat3x2 m) {
-}
-
-void d(vec2 v) {
-}
-
-void e(float f_1) {
-}
-
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat3x2(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat3x2 load_u_inner_2_m() {
-  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
-}
-
-void f() {
-  a(conv_arr4_S(u.inner));
-  b(conv_S(u.inner[2u]));
-  c(load_u_inner_2_m());
-  d(u.inner[0u].m_1.yx);
-  e(u.inner[0u].m_1.yx[0u]);
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.msl
deleted file mode 100644
index 9247913..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.msl
+++ /dev/null

@@ -1,49 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float3x2 m;
-  /* 0x0020 */ tint_array<int8_t, 40> tint_pad_1;
-  /* 0x0048 */ int after;
-  /* 0x004c */ tint_array<int8_t, 4> tint_pad_2;
-};
-
-void a(tint_array<S, 4> a_1) {
-}
-
-void b(S s) {
-}
-
-void c(float3x2 m) {
-}
-
-void d(float2 v) {
-}
-
-void e(float f_1) {
-}
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
-  a(*(tint_symbol));
-  b((*(tint_symbol))[2]);
-  c((*(tint_symbol))[2].m);
-  d(float2((*(tint_symbol))[0].m[1]).yx);
-  e(float2((*(tint_symbol))[0].m[1]).yx[0]);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.spvasm
deleted file mode 100644
index e7483d2..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.spvasm
+++ /dev/null

@@ -1,206 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 124
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "m_2"
-               OpMemberName %S_std140 4 "after"
-               OpName %u "u"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %a "a"
-               OpName %a_1 "a_1"
-               OpName %b "b"
-               OpName %s "s"
-               OpName %c "c"
-               OpName %m "m"
-               OpName %d "d"
-               OpName %v "v"
-               OpName %e "e"
-               OpName %f_1 "f_1"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpMemberDecorate %S_std140 4 Offset 72
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 80
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 72
-               OpDecorate %_arr_S_uint_4 ArrayStride 80
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-       %void = OpTypeVoid
-%mat3v2float = OpTypeMatrix %v2float 3
-          %S = OpTypeStruct %int %mat3v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-         %11 = OpTypeFunction %void %_arr_S_uint_4
-         %19 = OpTypeFunction %void %S
-         %23 = OpTypeFunction %void %mat3v2float
-         %27 = OpTypeFunction %void %v2float
-         %31 = OpTypeFunction %void %float
-         %35 = OpTypeFunction %S %S_std140
-         %46 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-         %52 = OpConstantNull %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %55 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %68 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %81 = OpTypeFunction %mat3v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-     %uint_3 = OpConstant %uint 3
-        %101 = OpTypeFunction %void
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-          %a = OpFunction %void None %11
-        %a_1 = OpFunctionParameter %_arr_S_uint_4
-         %18 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %b = OpFunction %void None %19
-          %s = OpFunctionParameter %S
-         %22 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %c = OpFunction %void None %23
-          %m = OpFunctionParameter %mat3v2float
-         %26 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %d = OpFunction %void None %27
-          %v = OpFunctionParameter %v2float
-         %30 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %e = OpFunction %void None %31
-        %f_1 = OpFunctionParameter %float
-         %34 = OpLabel
-               OpReturn
-               OpFunctionEnd
-     %conv_S = OpFunction %S None %35
-        %val = OpFunctionParameter %S_std140
-         %38 = OpLabel
-         %39 = OpCompositeExtract %int %val 0
-         %40 = OpCompositeExtract %v2float %val 1
-         %41 = OpCompositeExtract %v2float %val 2
-         %42 = OpCompositeExtract %v2float %val 3
-         %43 = OpCompositeConstruct %mat3v2float %40 %41 %42
-         %44 = OpCompositeExtract %int %val 4
-         %45 = OpCompositeConstruct %S %39 %43 %44
-               OpReturnValue %45
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %46
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %49 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %52
-          %i = OpVariable %_ptr_Function_uint Function %55
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %68
-               OpBranch %56
-         %56 = OpLabel
-               OpLoopMerge %57 %58 None
-               OpBranch %59
-         %59 = OpLabel
-         %61 = OpLoad %uint %i
-         %62 = OpULessThan %bool %61 %uint_4
-         %60 = OpLogicalNot %bool %62
-               OpSelectionMerge %64 None
-               OpBranchConditional %60 %65 %64
-         %65 = OpLabel
-               OpBranch %57
-         %64 = OpLabel
-               OpStore %var_for_index %val_0
-         %69 = OpLoad %uint %i
-         %71 = OpAccessChain %_ptr_Function_S %arr %69
-         %73 = OpLoad %uint %i
-         %75 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %73
-         %76 = OpLoad %S_std140 %75
-         %72 = OpFunctionCall %S %conv_S %76
-               OpStore %71 %72
-               OpBranch %58
-         %58 = OpLabel
-         %77 = OpLoad %uint %i
-         %79 = OpIAdd %uint %77 %uint_1
-               OpStore %i %79
-               OpBranch %56
-         %57 = OpLabel
-         %80 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %80
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat3v2float None %81
-         %83 = OpLabel
-         %88 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %91 = OpAccessChain %_ptr_Uniform_v2float %88 %uint_1
-         %92 = OpLoad %v2float %91
-         %94 = OpAccessChain %_ptr_Uniform_v2float %88 %uint_2
-         %95 = OpLoad %v2float %94
-         %98 = OpAccessChain %_ptr_Uniform_v2float %88 %uint_3
-         %99 = OpLoad %v2float %98
-        %100 = OpCompositeConstruct %mat3v2float %92 %95 %99
-               OpReturnValue %100
-               OpFunctionEnd
-          %f = OpFunction %void None %101
-        %103 = OpLabel
-        %107 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-        %108 = OpLoad %_arr_S_std140_uint_4 %107
-        %105 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %108
-        %104 = OpFunctionCall %void %a %105
-        %111 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-        %112 = OpLoad %S_std140 %111
-        %110 = OpFunctionCall %S %conv_S %112
-        %109 = OpFunctionCall %void %b %110
-        %114 = OpFunctionCall %mat3v2float %load_u_inner_2_m
-        %113 = OpFunctionCall %void %c %114
-        %116 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %55 %uint_2
-        %117 = OpLoad %v2float %116
-        %118 = OpVectorShuffle %v2float %117 %117 1 0
-        %115 = OpFunctionCall %void %d %118
-        %120 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %55 %uint_2
-        %121 = OpLoad %v2float %120
-        %122 = OpVectorShuffle %v2float %121 %121 1 0
-        %123 = OpCompositeExtract %float %122 0
-        %119 = OpFunctionCall %void %e %123
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.wgsl
deleted file mode 100644
index b7ca757..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_fn.wgsl.expected.wgsl
+++ /dev/null

@@ -1,32 +0,0 @@
-struct S {
-  before : i32,
-  @size(64)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-fn a(a : array<S, 4>) {
-}
-
-fn b(s : S) {
-}
-
-fn c(m : mat3x2<f32>) {
-}
-
-fn d(v : vec2<f32>) {
-}
-
-fn e(f : f32) {
-}
-
-@compute @workgroup_size(1)
-fn f() {
-  a(u);
-  b(u[2]);
-  c(u[2].m);
-  d(u[0].m[1].yx);
-  e(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl
deleted file mode 100644
index be13ffc..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  @size(64) m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-var<private> p : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    p = u;
-    p[1] = u[2];
-    p[3].m = u[2].m;
-    p[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.dxc.hlsl
deleted file mode 100644
index e1c3960..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,47 +0,0 @@
-struct S {
-  int before;
-  float3x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[20];
-};
-static S p[4] = (S[4])0;
-
-float3x2 tint_symbol_3(uint4 buffer[20], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[20], uint offset) {
-  const uint scalar_offset_3 = ((offset + 0u)) / 4;
-  const uint scalar_offset_4 = ((offset + 72u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[20], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 80u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  p = tint_symbol(u, 0u);
-  p[1] = tint_symbol_1(u, 160u);
-  p[3].m = tint_symbol_3(u, 168u);
-  p[1].m[0] = asfloat(u[1].xy).yx;
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.fxc.hlsl
deleted file mode 100644
index e1c3960..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,47 +0,0 @@
-struct S {
-  int before;
-  float3x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[20];
-};
-static S p[4] = (S[4])0;
-
-float3x2 tint_symbol_3(uint4 buffer[20], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[20], uint offset) {
-  const uint scalar_offset_3 = ((offset + 0u)) / 4;
-  const uint scalar_offset_4 = ((offset + 72u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[20], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 80u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  p = tint_symbol(u, 0u);
-  p[1] = tint_symbol_1(u, 160u);
-  p[3].m = tint_symbol_3(u, 168u);
-  p[1].m[0] = asfloat(u[1].xy).yx;
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.glsl
deleted file mode 100644
index 7c90023..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.glsl
+++ /dev/null

@@ -1,75 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat3x2 m;
-  uint pad_1;
-  uint pad_2;
-  uint pad_3;
-  uint pad_4;
-  uint pad_5;
-  uint pad_6;
-  uint pad_7;
-  uint pad_8;
-  uint pad_9;
-  uint pad_10;
-  int after;
-  uint pad_11;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  uint pad_1;
-  uint pad_2;
-  uint pad_3;
-  uint pad_4;
-  uint pad_5;
-  uint pad_6;
-  uint pad_7;
-  uint pad_8;
-  uint pad_9;
-  uint pad_10;
-  int after;
-  uint pad_11;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-S p[4] = S[4](S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u));
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat3x2(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat3x2 load_u_inner_2_m() {
-  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
-}
-
-void f() {
-  p = conv_arr4_S(u.inner);
-  p[1] = conv_S(u.inner[2u]);
-  p[3].m = load_u_inner_2_m();
-  p[1].m[0] = u.inner[0u].m_1.yx;
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.msl
deleted file mode 100644
index 2e60d0f..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.msl
+++ /dev/null

@@ -1,34 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float3x2 m;
-  /* 0x0020 */ tint_array<int8_t, 40> tint_pad_1;
-  /* 0x0048 */ int after;
-  /* 0x004c */ tint_array<int8_t, 4> tint_pad_2;
-};
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
-  thread tint_array<S, 4> tint_symbol = {};
-  tint_symbol = *(tint_symbol_1);
-  tint_symbol[1] = (*(tint_symbol_1))[2];
-  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
-  tint_symbol[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.spvasm
deleted file mode 100644
index 4cadc2f..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.spvasm
+++ /dev/null

@@ -1,173 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 106
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "m_2"
-               OpMemberName %S_std140 4 "after"
-               OpName %u "u"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %p "p"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpMemberDecorate %S_std140 4 Offset 72
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 80
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 72
-               OpDecorate %_arr_S_uint_4 ArrayStride 80
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat3v2float = OpTypeMatrix %v2float 3
-          %S = OpTypeStruct %int %mat3v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
-         %16 = OpConstantNull %_arr_S_uint_4
-          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
-         %17 = OpTypeFunction %S %S_std140
-         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %36 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %49 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %62 = OpTypeFunction %mat3v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-     %uint_3 = OpConstant %uint 3
-       %void = OpTypeVoid
-         %82 = OpTypeFunction %void
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-      %int_1 = OpConstant %int 1
-%_ptr_Private_S = OpTypePointer Private %S
-      %int_3 = OpConstant %int 3
-%_ptr_Private_mat3v2float = OpTypePointer Private %mat3v2float
-        %100 = OpConstantNull %int
-%_ptr_Private_v2float = OpTypePointer Private %v2float
-     %conv_S = OpFunction %S None %17
-        %val = OpFunctionParameter %S_std140
-         %20 = OpLabel
-         %21 = OpCompositeExtract %int %val 0
-         %22 = OpCompositeExtract %v2float %val 1
-         %23 = OpCompositeExtract %v2float %val 2
-         %24 = OpCompositeExtract %v2float %val 3
-         %25 = OpCompositeConstruct %mat3v2float %22 %23 %24
-         %26 = OpCompositeExtract %int %val 4
-         %27 = OpCompositeConstruct %S %21 %25 %26
-               OpReturnValue %27
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %31 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
-          %i = OpVariable %_ptr_Function_uint Function %36
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
-               OpBranch %37
-         %37 = OpLabel
-               OpLoopMerge %38 %39 None
-               OpBranch %40
-         %40 = OpLabel
-         %42 = OpLoad %uint %i
-         %43 = OpULessThan %bool %42 %uint_4
-         %41 = OpLogicalNot %bool %43
-               OpSelectionMerge %45 None
-               OpBranchConditional %41 %46 %45
-         %46 = OpLabel
-               OpBranch %38
-         %45 = OpLabel
-               OpStore %var_for_index %val_0
-         %50 = OpLoad %uint %i
-         %52 = OpAccessChain %_ptr_Function_S %arr %50
-         %54 = OpLoad %uint %i
-         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
-         %57 = OpLoad %S_std140 %56
-         %53 = OpFunctionCall %S %conv_S %57
-               OpStore %52 %53
-               OpBranch %39
-         %39 = OpLabel
-         %58 = OpLoad %uint %i
-         %60 = OpIAdd %uint %58 %uint_1
-               OpStore %i %60
-               OpBranch %37
-         %38 = OpLabel
-         %61 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %61
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat3v2float None %62
-         %64 = OpLabel
-         %69 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %72 = OpAccessChain %_ptr_Uniform_v2float %69 %uint_1
-         %73 = OpLoad %v2float %72
-         %75 = OpAccessChain %_ptr_Uniform_v2float %69 %uint_2
-         %76 = OpLoad %v2float %75
-         %79 = OpAccessChain %_ptr_Uniform_v2float %69 %uint_3
-         %80 = OpLoad %v2float %79
-         %81 = OpCompositeConstruct %mat3v2float %73 %76 %80
-               OpReturnValue %81
-               OpFunctionEnd
-          %f = OpFunction %void None %82
-         %85 = OpLabel
-         %88 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-         %89 = OpLoad %_arr_S_std140_uint_4 %88
-         %86 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %89
-               OpStore %p %86
-         %92 = OpAccessChain %_ptr_Private_S %p %int_1
-         %94 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %95 = OpLoad %S_std140 %94
-         %93 = OpFunctionCall %S %conv_S %95
-               OpStore %92 %93
-         %98 = OpAccessChain %_ptr_Private_mat3v2float %p %int_3 %uint_1
-         %99 = OpFunctionCall %mat3v2float %load_u_inner_2_m
-               OpStore %98 %99
-        %102 = OpAccessChain %_ptr_Private_v2float %p %int_1 %uint_1 %100
-        %103 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %36 %uint_2
-        %104 = OpLoad %v2float %103
-        %105 = OpVectorShuffle %v2float %104 %104 1 0
-               OpStore %102 %105
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.wgsl
deleted file mode 100644
index b0ff478..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_private.wgsl.expected.wgsl
+++ /dev/null

@@ -1,18 +0,0 @@
-struct S {
-  before : i32,
-  @size(64)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-var<private> p : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  p = u;
-  p[1] = u[2];
-  p[3].m = u[2].m;
-  p[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl
deleted file mode 100644
index 4848e4a..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  @size(64) m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    s = u;
-    s[1] = u[2];
-    s[3].m = u[2].m;
-    s[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 4cf6e38..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,68 +0,0 @@
-struct S {
-  int before;
-  float3x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[20];
-};
-RWByteAddressBuffer s : register(u1, space0);
-
-void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
-  buffer.Store2((offset + 0u), asuint(value[0u]));
-  buffer.Store2((offset + 8u), asuint(value[1u]));
-  buffer.Store2((offset + 16u), asuint(value[2u]));
-}
-
-void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
-  buffer.Store((offset + 0u), asuint(value.before));
-  tint_symbol_3(buffer, (offset + 8u), value.m);
-  buffer.Store((offset + 72u), asuint(value.after));
-}
-
-void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
-  S array[4] = value;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      tint_symbol_1(buffer, (offset + (i * 80u)), array[i]);
-    }
-  }
-}
-
-float3x2 tint_symbol_8(uint4 buffer[20], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
-}
-
-S tint_symbol_6(uint4 buffer[20], uint offset) {
-  const uint scalar_offset_3 = ((offset + 0u)) / 4;
-  const uint scalar_offset_4 = ((offset + 72u)) / 4;
-  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
-  return tint_symbol_10;
-}
-
-typedef S tint_symbol_5_ret[4];
-tint_symbol_5_ret tint_symbol_5(uint4 buffer[20], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 80u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
-  tint_symbol_1(s, 80u, tint_symbol_6(u, 160u));
-  tint_symbol_3(s, 248u, tint_symbol_8(u, 168u));
-  s.Store2(88u, asuint(asfloat(u[1].xy).yx));
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 4cf6e38..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,68 +0,0 @@
-struct S {
-  int before;
-  float3x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[20];
-};
-RWByteAddressBuffer s : register(u1, space0);
-
-void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
-  buffer.Store2((offset + 0u), asuint(value[0u]));
-  buffer.Store2((offset + 8u), asuint(value[1u]));
-  buffer.Store2((offset + 16u), asuint(value[2u]));
-}
-
-void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
-  buffer.Store((offset + 0u), asuint(value.before));
-  tint_symbol_3(buffer, (offset + 8u), value.m);
-  buffer.Store((offset + 72u), asuint(value.after));
-}
-
-void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
-  S array[4] = value;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      tint_symbol_1(buffer, (offset + (i * 80u)), array[i]);
-    }
-  }
-}
-
-float3x2 tint_symbol_8(uint4 buffer[20], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
-}
-
-S tint_symbol_6(uint4 buffer[20], uint offset) {
-  const uint scalar_offset_3 = ((offset + 0u)) / 4;
-  const uint scalar_offset_4 = ((offset + 72u)) / 4;
-  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
-  return tint_symbol_10;
-}
-
-typedef S tint_symbol_5_ret[4];
-tint_symbol_5_ret tint_symbol_5(uint4 buffer[20], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 80u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
-  tint_symbol_1(s, 80u, tint_symbol_6(u, 160u));
-  tint_symbol_3(s, 248u, tint_symbol_8(u, 168u));
-  s.Store2(88u, asuint(asfloat(u[1].xy).yx));
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.glsl
deleted file mode 100644
index 4013cf5..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.glsl
+++ /dev/null

@@ -1,78 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat3x2 m;
-  uint pad_1;
-  uint pad_2;
-  uint pad_3;
-  uint pad_4;
-  uint pad_5;
-  uint pad_6;
-  uint pad_7;
-  uint pad_8;
-  uint pad_9;
-  uint pad_10;
-  int after;
-  uint pad_11;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  uint pad_1;
-  uint pad_2;
-  uint pad_3;
-  uint pad_4;
-  uint pad_5;
-  uint pad_6;
-  uint pad_7;
-  uint pad_8;
-  uint pad_9;
-  uint pad_10;
-  int after;
-  uint pad_11;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-layout(binding = 1, std430) buffer u_block_ssbo {
-  S inner[4];
-} s;
-
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat3x2(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat3x2 load_u_inner_2_m() {
-  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
-}
-
-void f() {
-  s.inner = conv_arr4_S(u.inner);
-  s.inner[1] = conv_S(u.inner[2u]);
-  s.inner[3].m = load_u_inner_2_m();
-  s.inner[1].m[0] = u.inner[0u].m_1.yx;
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.msl
deleted file mode 100644
index 0ee4e4c..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.msl
+++ /dev/null

@@ -1,33 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float3x2 m;
-  /* 0x0020 */ tint_array<int8_t, 40> tint_pad_1;
-  /* 0x0048 */ int after;
-  /* 0x004c */ tint_array<int8_t, 4> tint_pad_2;
-};
-
-kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
-  *(tint_symbol) = *(tint_symbol_1);
-  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
-  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
-  (*(tint_symbol))[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.spvasm
deleted file mode 100644
index 93ef04f..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.spvasm
+++ /dev/null

@@ -1,182 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 109
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "m_2"
-               OpMemberName %S_std140 4 "after"
-               OpName %u "u"
-               OpName %u_block "u_block"
-               OpMemberName %u_block 0 "inner"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %s "s"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpMemberDecorate %S_std140 4 Offset 72
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 80
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpDecorate %u_block Block
-               OpMemberDecorate %u_block 0 Offset 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 72
-               OpDecorate %_arr_S_uint_4 ArrayStride 80
-               OpDecorate %s DescriptorSet 0
-               OpDecorate %s Binding 1
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat3v2float = OpTypeMatrix %v2float 3
-          %S = OpTypeStruct %int %mat3v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-    %u_block = OpTypeStruct %_arr_S_uint_4
-%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
-          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
-         %17 = OpTypeFunction %S %S_std140
-         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-         %34 = OpConstantNull %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %37 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %50 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %63 = OpTypeFunction %mat3v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-     %uint_3 = OpConstant %uint 3
-       %void = OpTypeVoid
-         %83 = OpTypeFunction %void
-%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-      %int_1 = OpConstant %int 1
-%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
-      %int_3 = OpConstant %int 3
-%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
-        %103 = OpConstantNull %int
-%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
-     %conv_S = OpFunction %S None %17
-        %val = OpFunctionParameter %S_std140
-         %20 = OpLabel
-         %21 = OpCompositeExtract %int %val 0
-         %22 = OpCompositeExtract %v2float %val 1
-         %23 = OpCompositeExtract %v2float %val 2
-         %24 = OpCompositeExtract %v2float %val 3
-         %25 = OpCompositeConstruct %mat3v2float %22 %23 %24
-         %26 = OpCompositeExtract %int %val 4
-         %27 = OpCompositeConstruct %S %21 %25 %26
-               OpReturnValue %27
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %31 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
-          %i = OpVariable %_ptr_Function_uint Function %37
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
-               OpBranch %38
-         %38 = OpLabel
-               OpLoopMerge %39 %40 None
-               OpBranch %41
-         %41 = OpLabel
-         %43 = OpLoad %uint %i
-         %44 = OpULessThan %bool %43 %uint_4
-         %42 = OpLogicalNot %bool %44
-               OpSelectionMerge %46 None
-               OpBranchConditional %42 %47 %46
-         %47 = OpLabel
-               OpBranch %39
-         %46 = OpLabel
-               OpStore %var_for_index %val_0
-         %51 = OpLoad %uint %i
-         %53 = OpAccessChain %_ptr_Function_S %arr %51
-         %55 = OpLoad %uint %i
-         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
-         %58 = OpLoad %S_std140 %57
-         %54 = OpFunctionCall %S %conv_S %58
-               OpStore %53 %54
-               OpBranch %40
-         %40 = OpLabel
-         %59 = OpLoad %uint %i
-         %61 = OpIAdd %uint %59 %uint_1
-               OpStore %i %61
-               OpBranch %38
-         %39 = OpLabel
-         %62 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %62
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat3v2float None %63
-         %65 = OpLabel
-         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %73 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_1
-         %74 = OpLoad %v2float %73
-         %76 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_2
-         %77 = OpLoad %v2float %76
-         %80 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_3
-         %81 = OpLoad %v2float %80
-         %82 = OpCompositeConstruct %mat3v2float %74 %77 %81
-               OpReturnValue %82
-               OpFunctionEnd
-          %f = OpFunction %void None %83
-         %86 = OpLabel
-         %88 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
-         %91 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-         %92 = OpLoad %_arr_S_std140_uint_4 %91
-         %89 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %92
-               OpStore %88 %89
-         %95 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
-         %97 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %98 = OpLoad %S_std140 %97
-         %96 = OpFunctionCall %S %conv_S %98
-               OpStore %95 %96
-        %101 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %s %uint_0 %int_3 %uint_1
-        %102 = OpFunctionCall %mat3v2float %load_u_inner_2_m
-               OpStore %101 %102
-        %105 = OpAccessChain %_ptr_StorageBuffer_v2float %s %uint_0 %int_1 %uint_1 %103
-        %106 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
-        %107 = OpLoad %v2float %106
-        %108 = OpVectorShuffle %v2float %107 %107 1 0
-               OpStore %105 %108
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.wgsl
deleted file mode 100644
index 56b0f84..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_storage.wgsl.expected.wgsl
+++ /dev/null

@@ -1,18 +0,0 @@
-struct S {
-  before : i32,
-  @size(64)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  s = u;
-  s[1] = u[2];
-  s[3].m = u[2].m;
-  s[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl
deleted file mode 100644
index 282f12d..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  @size(64) m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-var<workgroup> w : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    w = u;
-    w[1] = u[2];
-    w[3].m = u[2].m;
-    w[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 1da5b98..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,63 +0,0 @@
-struct S {
-  int before;
-  float3x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[20];
-};
-groupshared S w[4];
-
-struct tint_symbol_1 {
-  uint local_invocation_index : SV_GroupIndex;
-};
-
-float3x2 tint_symbol_5(uint4 buffer[20], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
-}
-
-S tint_symbol_3(uint4 buffer[20], uint offset) {
-  const uint scalar_offset_3 = ((offset + 0u)) / 4;
-  const uint scalar_offset_4 = ((offset + 72u)) / 4;
-  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
-  return tint_symbol_8;
-}
-
-typedef S tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[20], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 80u)));
-    }
-  }
-  return arr;
-}
-
-void f_inner(uint local_invocation_index) {
-  {
-    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-      const uint i = idx;
-      const S tint_symbol_7 = (S)0;
-      w[i] = tint_symbol_7;
-    }
-  }
-  GroupMemoryBarrierWithGroupSync();
-  w = tint_symbol_2(u, 0u);
-  w[1] = tint_symbol_3(u, 160u);
-  w[3].m = tint_symbol_5(u, 168u);
-  w[1].m[0] = asfloat(u[1].xy).yx;
-}
-
-[numthreads(1, 1, 1)]
-void f(tint_symbol_1 tint_symbol) {
-  f_inner(tint_symbol.local_invocation_index);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 1da5b98..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,63 +0,0 @@
-struct S {
-  int before;
-  float3x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[20];
-};
-groupshared S w[4];
-
-struct tint_symbol_1 {
-  uint local_invocation_index : SV_GroupIndex;
-};
-
-float3x2 tint_symbol_5(uint4 buffer[20], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
-}
-
-S tint_symbol_3(uint4 buffer[20], uint offset) {
-  const uint scalar_offset_3 = ((offset + 0u)) / 4;
-  const uint scalar_offset_4 = ((offset + 72u)) / 4;
-  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
-  return tint_symbol_8;
-}
-
-typedef S tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[20], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 80u)));
-    }
-  }
-  return arr;
-}
-
-void f_inner(uint local_invocation_index) {
-  {
-    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-      const uint i = idx;
-      const S tint_symbol_7 = (S)0;
-      w[i] = tint_symbol_7;
-    }
-  }
-  GroupMemoryBarrierWithGroupSync();
-  w = tint_symbol_2(u, 0u);
-  w[1] = tint_symbol_3(u, 160u);
-  w[3].m = tint_symbol_5(u, 168u);
-  w[1].m[0] = asfloat(u[1].xy).yx;
-}
-
-[numthreads(1, 1, 1)]
-void f(tint_symbol_1 tint_symbol) {
-  f_inner(tint_symbol.local_invocation_index);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.glsl
deleted file mode 100644
index 9c72f93..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.glsl
+++ /dev/null

@@ -1,83 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat3x2 m;
-  uint pad_1;
-  uint pad_2;
-  uint pad_3;
-  uint pad_4;
-  uint pad_5;
-  uint pad_6;
-  uint pad_7;
-  uint pad_8;
-  uint pad_9;
-  uint pad_10;
-  int after;
-  uint pad_11;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  uint pad_1;
-  uint pad_2;
-  uint pad_3;
-  uint pad_4;
-  uint pad_5;
-  uint pad_6;
-  uint pad_7;
-  uint pad_8;
-  uint pad_9;
-  uint pad_10;
-  int after;
-  uint pad_11;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-shared S w[4];
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat3x2(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat3x2 load_u_inner_2_m() {
-  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
-}
-
-void f(uint local_invocation_index) {
-  {
-    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-      uint i = idx;
-      S tint_symbol = S(0, 0u, mat3x2(vec2(0.0f), vec2(0.0f), vec2(0.0f)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u);
-      w[i] = tint_symbol;
-    }
-  }
-  barrier();
-  w = conv_arr4_S(u.inner);
-  w[1] = conv_S(u.inner[2u]);
-  w[3].m = load_u_inner_2_m();
-  w[1].m[0] = u.inner[0u].m_1.yx;
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f(gl_LocalInvocationIndex);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.msl
deleted file mode 100644
index 97de8a8..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.msl
+++ /dev/null

@@ -1,48 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float3x2 m;
-  /* 0x0020 */ tint_array<int8_t, 40> tint_pad_1;
-  /* 0x0048 */ int after;
-  /* 0x004c */ tint_array<int8_t, 4> tint_pad_2;
-};
-
-struct tint_symbol_6 {
-  tint_array<S, 4> w;
-};
-
-void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
-  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-    uint const i = idx;
-    S const tint_symbol = S{};
-    (*(tint_symbol_1))[i] = tint_symbol;
-  }
-  threadgroup_barrier(mem_flags::mem_threadgroup);
-  *(tint_symbol_1) = *(tint_symbol_2);
-  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
-  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
-  (*(tint_symbol_1))[1].m[0] = float2((*(tint_symbol_2))[0].m[1]).yx;
-}
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
-  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.spvasm
deleted file mode 100644
index be09c71..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.spvasm
+++ /dev/null

@@ -1,216 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 131
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %local_invocation_index_1 "local_invocation_index_1"
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "m_2"
-               OpMemberName %S_std140 4 "after"
-               OpName %u "u"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %w "w"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f_inner "f_inner"
-               OpName %local_invocation_index "local_invocation_index"
-               OpName %idx "idx"
-               OpName %f "f"
-               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpMemberDecorate %S_std140 4 Offset 72
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 80
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 72
-               OpDecorate %_arr_S_uint_4 ArrayStride 80
-       %uint = OpTypeInt 32 0
-%_ptr_Input_uint = OpTypePointer Input %uint
-%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat3v2float = OpTypeMatrix %v2float 3
-          %S = OpTypeStruct %int %mat3v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
-          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
-         %18 = OpTypeFunction %S %S_std140
-         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-         %35 = OpConstantNull %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %38 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %51 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %64 = OpTypeFunction %mat3v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-     %uint_3 = OpConstant %uint 3
-       %void = OpTypeVoid
-         %84 = OpTypeFunction %void %uint
-%_ptr_Workgroup_S = OpTypePointer Workgroup %S
-        %102 = OpConstantNull %S
-   %uint_264 = OpConstant %uint 264
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-      %int_1 = OpConstant %int 1
-      %int_3 = OpConstant %int 3
-%_ptr_Workgroup_mat3v2float = OpTypePointer Workgroup %mat3v2float
-        %120 = OpConstantNull %int
-%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
-        %126 = OpTypeFunction %void
-     %conv_S = OpFunction %S None %18
-        %val = OpFunctionParameter %S_std140
-         %21 = OpLabel
-         %22 = OpCompositeExtract %int %val 0
-         %23 = OpCompositeExtract %v2float %val 1
-         %24 = OpCompositeExtract %v2float %val 2
-         %25 = OpCompositeExtract %v2float %val 3
-         %26 = OpCompositeConstruct %mat3v2float %23 %24 %25
-         %27 = OpCompositeExtract %int %val 4
-         %28 = OpCompositeConstruct %S %22 %26 %27
-               OpReturnValue %28
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %32 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
-          %i = OpVariable %_ptr_Function_uint Function %38
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
-               OpBranch %39
-         %39 = OpLabel
-               OpLoopMerge %40 %41 None
-               OpBranch %42
-         %42 = OpLabel
-         %44 = OpLoad %uint %i
-         %45 = OpULessThan %bool %44 %uint_4
-         %43 = OpLogicalNot %bool %45
-               OpSelectionMerge %47 None
-               OpBranchConditional %43 %48 %47
-         %48 = OpLabel
-               OpBranch %40
-         %47 = OpLabel
-               OpStore %var_for_index %val_0
-         %52 = OpLoad %uint %i
-         %54 = OpAccessChain %_ptr_Function_S %arr %52
-         %56 = OpLoad %uint %i
-         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
-         %59 = OpLoad %S_std140 %58
-         %55 = OpFunctionCall %S %conv_S %59
-               OpStore %54 %55
-               OpBranch %41
-         %41 = OpLabel
-         %60 = OpLoad %uint %i
-         %62 = OpIAdd %uint %60 %uint_1
-               OpStore %i %62
-               OpBranch %39
-         %40 = OpLabel
-         %63 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %63
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat3v2float None %64
-         %66 = OpLabel
-         %71 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %74 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_1
-         %75 = OpLoad %v2float %74
-         %77 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_2
-         %78 = OpLoad %v2float %77
-         %81 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_3
-         %82 = OpLoad %v2float %81
-         %83 = OpCompositeConstruct %mat3v2float %75 %78 %82
-               OpReturnValue %83
-               OpFunctionEnd
-    %f_inner = OpFunction %void None %84
-%local_invocation_index = OpFunctionParameter %uint
-         %88 = OpLabel
-        %idx = OpVariable %_ptr_Function_uint Function %38
-               OpStore %idx %local_invocation_index
-               OpBranch %90
-         %90 = OpLabel
-               OpLoopMerge %91 %92 None
-               OpBranch %93
-         %93 = OpLabel
-         %95 = OpLoad %uint %idx
-         %96 = OpULessThan %bool %95 %uint_4
-         %94 = OpLogicalNot %bool %96
-               OpSelectionMerge %97 None
-               OpBranchConditional %94 %98 %97
-         %98 = OpLabel
-               OpBranch %91
-         %97 = OpLabel
-         %99 = OpLoad %uint %idx
-        %101 = OpAccessChain %_ptr_Workgroup_S %w %99
-               OpStore %101 %102
-               OpBranch %92
-         %92 = OpLabel
-        %103 = OpLoad %uint %idx
-        %104 = OpIAdd %uint %103 %uint_1
-               OpStore %idx %104
-               OpBranch %90
-         %91 = OpLabel
-               OpControlBarrier %uint_2 %uint_2 %uint_264
-        %109 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-        %110 = OpLoad %_arr_S_std140_uint_4 %109
-        %107 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %110
-               OpStore %w %107
-        %112 = OpAccessChain %_ptr_Workgroup_S %w %int_1
-        %114 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-        %115 = OpLoad %S_std140 %114
-        %113 = OpFunctionCall %S %conv_S %115
-               OpStore %112 %113
-        %118 = OpAccessChain %_ptr_Workgroup_mat3v2float %w %int_3 %uint_1
-        %119 = OpFunctionCall %mat3v2float %load_u_inner_2_m
-               OpStore %118 %119
-        %122 = OpAccessChain %_ptr_Workgroup_v2float %w %int_1 %uint_1 %120
-        %123 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %38 %uint_2
-        %124 = OpLoad %v2float %123
-        %125 = OpVectorShuffle %v2float %124 %124 1 0
-               OpStore %122 %125
-               OpReturn
-               OpFunctionEnd
-          %f = OpFunction %void None %126
-        %128 = OpLabel
-        %130 = OpLoad %uint %local_invocation_index_1
-        %129 = OpFunctionCall %void %f_inner %130
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.wgsl
deleted file mode 100644
index 286d523..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat3x2/to_workgroup.wgsl.expected.wgsl
+++ /dev/null

@@ -1,18 +0,0 @@
-struct S {
-  before : i32,
-  @size(64)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-var<workgroup> w : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  w = u;
-  w[1] = u[2];
-  w[3].m = u[2].m;
-  w[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..8404ee4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat3x2<f16>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec2<f16>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f16             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..33c5e7a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,81 @@
+struct Inner {
+  matrix<float16_t, 3, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 2> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 3, 2> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (4u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint ubo_load_3 = a[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  const vector<float16_t, 2> l_a_i_a_i_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (4u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f9dc319
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,86 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 3, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 2> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 3, 2> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (4u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint ubo_load_3 = a[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  const vector<float16_t, 2> l_a_i_a_i_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (4u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002078397D2F0(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..369aa2b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,158 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat3x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+};
+
+struct Inner_std140 {
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat3x2(val.m_0, val.m_1, val.m_2), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x2 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return f16mat3x2(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1, a.inner[s_save].a[s_save_1].m_2);
+}
+
+f16vec2 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2;
+      break;
+    }
+    default: {
+      return f16vec2(0.0hf);
+      break;
+    }
+  }
+}
+
+float16_t load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2[p3];
+      break;
+    }
+    default: {
+      return 0.0hf;
+      break;
+    }
+  }
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat3x2 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  f16vec2 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat3x2 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  f16vec2 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float16_t l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..322fa8e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half3x2 m;
+  /* 0x000c */ tint_array<int8_t, 52> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  half3x2 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  half2 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  half const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..c17857d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,324 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 204
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 4
+               OpMemberDecorate %Inner_std140 2 Offset 8
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 4
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%Inner_std140 = OpTypeStruct %v2half %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat3v2half = OpTypeMatrix %v2half 3
+      %Inner = OpTypeStruct %mat3v2half
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %34 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %41 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %44 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %57 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %70 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %78 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %85 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %98 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %110 = OpTypeFunction %mat3v2half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_2 = OpConstant %uint 2
+        %131 = OpTypeFunction %v2half %uint %uint %uint
+        %148 = OpConstantNull %v2half
+        %149 = OpTypeFunction %half %uint %uint %uint %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %168 = OpConstantNull %half
+       %void = OpTypeVoid
+        %169 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v2half %val 0
+         %30 = OpCompositeExtract %v2half %val 1
+         %31 = OpCompositeExtract %v2half %val 2
+         %32 = OpCompositeConstruct %mat3v2half %29 %30 %31
+         %33 = OpCompositeConstruct %Inner %32
+               OpReturnValue %33
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %34
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %38 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %41
+        %i_0 = OpVariable %_ptr_Function_uint Function %44
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %57
+               OpBranch %45
+         %45 = OpLabel
+               OpLoopMerge %46 %47 None
+               OpBranch %48
+         %48 = OpLabel
+         %50 = OpLoad %uint %i_0
+         %51 = OpULessThan %bool %50 %uint_4
+         %49 = OpLogicalNot %bool %51
+               OpSelectionMerge %53 None
+               OpBranchConditional %49 %54 %53
+         %54 = OpLabel
+               OpBranch %46
+         %53 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %58 = OpLoad %uint %i_0
+         %60 = OpAccessChain %_ptr_Function_Inner %arr %58
+         %62 = OpLoad %uint %i_0
+         %64 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %62
+         %65 = OpLoad %Inner_std140 %64
+         %61 = OpFunctionCall %Inner %conv_Inner %65
+               OpStore %60 %61
+               OpBranch %47
+         %47 = OpLabel
+         %66 = OpLoad %uint %i_0
+         %68 = OpIAdd %uint %66 %uint_1
+               OpStore %i_0 %68
+               OpBranch %45
+         %46 = OpLabel
+         %69 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %69
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %70
+      %val_1 = OpFunctionParameter %Outer_std140
+         %74 = OpLabel
+         %76 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %75 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %76
+         %77 = OpCompositeConstruct %Outer %75
+               OpReturnValue %77
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %78
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %82 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %85
+        %i_1 = OpVariable %_ptr_Function_uint Function %44
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %98
+               OpBranch %87
+         %87 = OpLabel
+               OpLoopMerge %88 %89 None
+               OpBranch %90
+         %90 = OpLabel
+         %92 = OpLoad %uint %i_1
+         %93 = OpULessThan %bool %92 %uint_4
+         %91 = OpLogicalNot %bool %93
+               OpSelectionMerge %94 None
+               OpBranchConditional %91 %95 %94
+         %95 = OpLabel
+               OpBranch %88
+         %94 = OpLabel
+               OpStore %var_for_index %val_2
+         %99 = OpLoad %uint %i_1
+        %101 = OpAccessChain %_ptr_Function_Outer %arr_0 %99
+        %103 = OpLoad %uint %i_1
+        %105 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %103
+        %106 = OpLoad %Outer_std140 %105
+        %102 = OpFunctionCall %Outer %conv_Outer %106
+               OpStore %101 %102
+               OpBranch %89
+         %89 = OpLabel
+        %107 = OpLoad %uint %i_1
+        %108 = OpIAdd %uint %107 %uint_1
+               OpStore %i_1 %108
+               OpBranch %87
+         %88 = OpLabel
+        %109 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %109
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m = OpFunction %mat3v2half None %110
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %114 = OpLabel
+        %118 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
+        %121 = OpAccessChain %_ptr_Uniform_v2half %118 %uint_0
+        %122 = OpLoad %v2half %121
+        %124 = OpAccessChain %_ptr_Uniform_v2half %118 %uint_1
+        %125 = OpLoad %v2half %124
+        %128 = OpAccessChain %_ptr_Uniform_v2half %118 %uint_2
+        %129 = OpLoad %v2half %128
+        %130 = OpCompositeConstruct %mat3v2half %122 %125 %129
+               OpReturnValue %130
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2 = OpFunction %v2half None %131
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %136 = OpLabel
+               OpSelectionMerge %137 None
+               OpSwitch %p2 %138 0 %139 1 %140 2 %141
+        %139 = OpLabel
+        %142 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %143 = OpLoad %v2half %142
+               OpReturnValue %143
+        %140 = OpLabel
+        %144 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %145 = OpLoad %v2half %144
+               OpReturnValue %145
+        %141 = OpLabel
+        %146 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_2
+        %147 = OpLoad %v2half %146
+               OpReturnValue %147
+        %138 = OpLabel
+               OpReturnValue %148
+        %137 = OpLabel
+               OpReturnValue %148
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %half None %149
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %155 = OpLabel
+               OpSelectionMerge %156 None
+               OpSwitch %p2_0 %157 0 %158 1 %159 2 %160
+        %158 = OpLabel
+        %162 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %163 = OpLoad %half %162
+               OpReturnValue %163
+        %159 = OpLabel
+        %164 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %165 = OpLoad %half %164
+               OpReturnValue %165
+        %160 = OpLabel
+        %166 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_2 %p3
+        %167 = OpLoad %half %166
+               OpReturnValue %167
+        %157 = OpLabel
+               OpReturnValue %168
+        %156 = OpLabel
+               OpReturnValue %168
+               OpFunctionEnd
+          %f = OpFunction %void None %169
+        %172 = OpLabel
+        %173 = OpFunctionCall %int %i
+        %174 = OpFunctionCall %int %i
+        %175 = OpFunctionCall %int %i
+        %178 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %179 = OpLoad %_arr_Outer_std140_uint_4 %178
+        %176 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %179
+        %182 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %173
+        %183 = OpLoad %Outer_std140 %182
+        %180 = OpFunctionCall %Outer %conv_Outer %183
+        %186 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %173 %uint_0
+        %187 = OpLoad %_arr_Inner_std140_uint_4 %186
+        %184 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %187
+        %189 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %173 %uint_0 %174
+        %190 = OpLoad %Inner_std140 %189
+        %188 = OpFunctionCall %Inner %conv_Inner %190
+        %192 = OpBitcast %uint %173
+        %193 = OpBitcast %uint %174
+        %191 = OpFunctionCall %mat3v2half %load_a_inner_p0_a_p1_m %192 %193
+        %195 = OpBitcast %uint %173
+        %196 = OpBitcast %uint %174
+        %197 = OpBitcast %uint %175
+        %194 = OpFunctionCall %v2half %load_a_inner_p0_a_p1_m_p2 %195 %196 %197
+        %198 = OpFunctionCall %int %i
+        %200 = OpBitcast %uint %173
+        %201 = OpBitcast %uint %174
+        %202 = OpBitcast %uint %175
+        %203 = OpBitcast %uint %198
+        %199 = OpFunctionCall %half %load_a_inner_p0_a_p1_m_p2_p3 %200 %201 %202 %203
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..9ffd10c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,36 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat3x2<f16> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec2<f16> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f16 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..59e2db0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,31 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat3x2<f16>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec2<f16>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f16             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..551b533
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,65 @@
+struct Inner {
+  matrix<float16_t, 3, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 3, 2> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 3, 2> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint ubo_load_3 = a[56].y;
+  const vector<float16_t, 2> l_a_3_a_2_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].y) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c63a31e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,70 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 3, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 3, 2> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 3, 2> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint ubo_load_3 = a[56].y;
+  const vector<float16_t, 2> l_a_3_a_2_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].y) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000229471C1EC0(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..987efda
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,104 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat3x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+};
+
+struct Inner_std140 {
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat3x2(val.m_0, val.m_1, val.m_2), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x2 load_a_inner_3_a_2_m() {
+  return f16mat3x2(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1, a.inner[3u].a[2u].m_2);
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat3x2 p_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec2 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat3x2 l_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec2 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float16_t l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..40a5b61
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half3x2 m;
+  /* 0x000c */ tint_array<int8_t, 52> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  half3x2 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  half2 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  half const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..9044e1f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,232 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 144
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 4
+               OpMemberDecorate %Inner_std140 2 Offset 8
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 4
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%Inner_std140 = OpTypeStruct %v2half %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat3v2half = OpTypeMatrix %v2half 3
+      %Inner = OpTypeStruct %mat3v2half
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %23 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %30 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %33 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %46 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %59 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %67 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %74 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %87 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+         %99 = OpTypeFunction %mat3v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %void = OpTypeVoid
+        %119 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v2half %val 0
+         %19 = OpCompositeExtract %v2half %val 1
+         %20 = OpCompositeExtract %v2half %val 2
+         %21 = OpCompositeConstruct %mat3v2half %18 %19 %20
+         %22 = OpCompositeConstruct %Inner %21
+               OpReturnValue %22
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %23
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %30
+          %i = OpVariable %_ptr_Function_uint Function %33
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %46
+               OpBranch %34
+         %34 = OpLabel
+               OpLoopMerge %35 %36 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %uint %i
+         %40 = OpULessThan %bool %39 %uint_4
+         %38 = OpLogicalNot %bool %40
+               OpSelectionMerge %42 None
+               OpBranchConditional %38 %43 %42
+         %43 = OpLabel
+               OpBranch %35
+         %42 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %47 = OpLoad %uint %i
+         %49 = OpAccessChain %_ptr_Function_Inner %arr %47
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %51
+         %54 = OpLoad %Inner_std140 %53
+         %50 = OpFunctionCall %Inner %conv_Inner %54
+               OpStore %49 %50
+               OpBranch %36
+         %36 = OpLabel
+         %55 = OpLoad %uint %i
+         %57 = OpIAdd %uint %55 %uint_1
+               OpStore %i %57
+               OpBranch %34
+         %35 = OpLabel
+         %58 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %58
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %59
+      %val_1 = OpFunctionParameter %Outer_std140
+         %63 = OpLabel
+         %65 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %64 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %65
+         %66 = OpCompositeConstruct %Outer %64
+               OpReturnValue %66
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %67
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %71 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %74
+        %i_0 = OpVariable %_ptr_Function_uint Function %33
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %87
+               OpBranch %76
+         %76 = OpLabel
+               OpLoopMerge %77 %78 None
+               OpBranch %79
+         %79 = OpLabel
+         %81 = OpLoad %uint %i_0
+         %82 = OpULessThan %bool %81 %uint_4
+         %80 = OpLogicalNot %bool %82
+               OpSelectionMerge %83 None
+               OpBranchConditional %80 %84 %83
+         %84 = OpLabel
+               OpBranch %77
+         %83 = OpLabel
+               OpStore %var_for_index %val_2
+         %88 = OpLoad %uint %i_0
+         %90 = OpAccessChain %_ptr_Function_Outer %arr_0 %88
+         %92 = OpLoad %uint %i_0
+         %94 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %92
+         %95 = OpLoad %Outer_std140 %94
+         %91 = OpFunctionCall %Outer %conv_Outer %95
+               OpStore %90 %91
+               OpBranch %78
+         %78 = OpLabel
+         %96 = OpLoad %uint %i_0
+         %97 = OpIAdd %uint %96 %uint_1
+               OpStore %i_0 %97
+               OpBranch %76
+         %77 = OpLabel
+         %98 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %98
+               OpFunctionEnd
+%load_a_inner_3_a_2_m = OpFunction %mat3v2half None %99
+        %101 = OpLabel
+        %107 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %110 = OpAccessChain %_ptr_Uniform_v2half %107 %uint_0
+        %111 = OpLoad %v2half %110
+        %113 = OpAccessChain %_ptr_Uniform_v2half %107 %uint_1
+        %114 = OpLoad %v2half %113
+        %116 = OpAccessChain %_ptr_Uniform_v2half %107 %uint_2
+        %117 = OpLoad %v2half %116
+        %118 = OpCompositeConstruct %mat3v2half %111 %114 %117
+               OpReturnValue %118
+               OpFunctionEnd
+          %f = OpFunction %void None %119
+        %122 = OpLabel
+        %125 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %126 = OpLoad %_arr_Outer_std140_uint_4 %125
+        %123 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %126
+        %129 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %130 = OpLoad %Outer_std140 %129
+        %127 = OpFunctionCall %Outer %conv_Outer %130
+        %133 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %134 = OpLoad %_arr_Inner_std140_uint_4 %133
+        %131 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %134
+        %136 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %137 = OpLoad %Inner_std140 %136
+        %135 = OpFunctionCall %Inner %conv_Inner %137
+        %138 = OpFunctionCall %mat3v2half %load_a_inner_3_a_2_m
+        %139 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %140 = OpLoad %v2half %139
+        %142 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %33
+        %143 = OpLoad %half %142
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..d4fcf32
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,29 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat3x2<f16> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec2<f16> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f16 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl
new file mode 100644
index 0000000..6a87ef8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].yx);
+    let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3dfa58d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 3> t = transpose(tint_symbol(u, 260u));
+  uint ubo_load_3 = u[0].z;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx);
+  uint ubo_load_4 = u[0].z;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a1c36ac
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,28 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 3> t = transpose(tint_symbol(u, 260u));
+  uint ubo_load_3 = u[0].z;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx);
+  uint ubo_load_4 = u[0].z;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001CB6BBFDD80(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..0f1b4ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,90 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat3x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  int after;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  int after;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+f16mat3x2 load_u_inner_2_m() {
+  return f16mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  f16mat2x3 t = transpose(load_u_inner_2_m());
+  float16_t l = length(u.inner[0u].m_1.yx);
+  float16_t a = abs(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..d2ab552
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half3x2 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  half2x3 const t = transpose((*(tint_symbol))[2].m);
+  half const l = length(half2((*(tint_symbol))[0].m[1]).yx);
+  half const a = fabs(half2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..e4c71b7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,86 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 52
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %42 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 12
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v2half = OpTypeMatrix %v2half 3
+         %11 = OpTypeFunction %mat3v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+     %v3half = OpTypeVector %half 3
+ %mat2v3half = OpTypeMatrix %v3half 2
+         %43 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat3v2half None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v2half %19 %uint_1
+         %24 = OpLoad %v2half %23
+         %26 = OpAccessChain %_ptr_Uniform_v2half %19 %uint_2
+         %27 = OpLoad %v2half %26
+         %30 = OpAccessChain %_ptr_Uniform_v2half %19 %uint_3
+         %31 = OpLoad %v2half %30
+         %32 = OpCompositeConstruct %mat3v2half %24 %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %40 = OpFunctionCall %mat3v2half %load_u_inner_2_m
+         %37 = OpTranspose %mat2v3half %40
+         %44 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %43 %uint_2
+         %45 = OpLoad %v2half %44
+         %46 = OpVectorShuffle %v2half %45 %45 1 0
+         %41 = OpExtInst %half %42 Length %46
+         %48 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %43 %uint_2
+         %49 = OpLoad %v2half %48
+         %50 = OpVectorShuffle %v2half %49 %49 1 0
+         %51 = OpCompositeExtract %half %50 0
+         %47 = OpExtInst %half %42 FAbs %51
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..53dbf9f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].yx);
+  let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl
new file mode 100644
index 0000000..35cae28
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl

@@ -0,0 +1,25 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat3x2<f16>) {}
+fn d(v : vec2<f16>) {}
+fn e(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].yx);
+    e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b0b76cc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,64 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 3, 2> m) {
+}
+
+void d(vector<float16_t, 2> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 260u));
+  uint ubo_load_3 = u[0].z;
+  d(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx);
+  uint ubo_load_4 = u[0].z;
+  e(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f295aea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,69 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 3, 2> m) {
+}
+
+void d(vector<float16_t, 2> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 260u));
+  uint ubo_load_3 = u[0].z;
+  d(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx);
+  uint ubo_load_4 = u[0].z;
+  e(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000017A6DD8FA30(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..41a3d44
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,121 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat3x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  int after;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  int after;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(f16mat3x2 m) {
+}
+
+void d(f16vec2 v) {
+}
+
+void e(float16_t f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat3x2(val.m_0, val.m_1, val.m_2), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.after, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25, val.pad_26);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x2 load_u_inner_2_m() {
+  return f16mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.yx);
+  e(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..f0a5dce
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half3x2 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(half3x2 m) {
+}
+
+void d(half2 v) {
+}
+
+void e(half f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(half2((*(tint_symbol))[0].m[1]).yx);
+  e(half2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..b77ef16
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,210 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 124
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 12
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat3v2half = OpTypeMatrix %v2half 3
+          %S = OpTypeStruct %int %mat3v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat3v2half
+         %27 = OpTypeFunction %void %v2half
+         %31 = OpTypeFunction %void %half
+         %35 = OpTypeFunction %S %S_std140
+         %46 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %52 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %55 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %68 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %81 = OpTypeFunction %mat3v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_3 = OpConstant %uint 3
+        %101 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat3v2half
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v2half
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %half
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v2half %val 1
+         %41 = OpCompositeExtract %v2half %val 2
+         %42 = OpCompositeExtract %v2half %val 3
+         %43 = OpCompositeConstruct %mat3v2half %40 %41 %42
+         %44 = OpCompositeExtract %int %val 4
+         %45 = OpCompositeConstruct %S %39 %43 %44
+               OpReturnValue %45
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %46
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %49 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %52
+          %i = OpVariable %_ptr_Function_uint Function %55
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %68
+               OpBranch %56
+         %56 = OpLabel
+               OpLoopMerge %57 %58 None
+               OpBranch %59
+         %59 = OpLabel
+         %61 = OpLoad %uint %i
+         %62 = OpULessThan %bool %61 %uint_4
+         %60 = OpLogicalNot %bool %62
+               OpSelectionMerge %64 None
+               OpBranchConditional %60 %65 %64
+         %65 = OpLabel
+               OpBranch %57
+         %64 = OpLabel
+               OpStore %var_for_index %val_0
+         %69 = OpLoad %uint %i
+         %71 = OpAccessChain %_ptr_Function_S %arr %69
+         %73 = OpLoad %uint %i
+         %75 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %73
+         %76 = OpLoad %S_std140 %75
+         %72 = OpFunctionCall %S %conv_S %76
+               OpStore %71 %72
+               OpBranch %58
+         %58 = OpLabel
+         %77 = OpLoad %uint %i
+         %79 = OpIAdd %uint %77 %uint_1
+               OpStore %i %79
+               OpBranch %56
+         %57 = OpLabel
+         %80 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %80
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v2half None %81
+         %83 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %91 = OpAccessChain %_ptr_Uniform_v2half %88 %uint_1
+         %92 = OpLoad %v2half %91
+         %94 = OpAccessChain %_ptr_Uniform_v2half %88 %uint_2
+         %95 = OpLoad %v2half %94
+         %98 = OpAccessChain %_ptr_Uniform_v2half %88 %uint_3
+         %99 = OpLoad %v2half %98
+        %100 = OpCompositeConstruct %mat3v2half %92 %95 %99
+               OpReturnValue %100
+               OpFunctionEnd
+          %f = OpFunction %void None %101
+        %103 = OpLabel
+        %107 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %108 = OpLoad %_arr_S_std140_uint_4 %107
+        %105 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %108
+        %104 = OpFunctionCall %void %a %105
+        %111 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %112 = OpLoad %S_std140 %111
+        %110 = OpFunctionCall %S %conv_S %112
+        %109 = OpFunctionCall %void %b %110
+        %114 = OpFunctionCall %mat3v2half %load_u_inner_2_m
+        %113 = OpFunctionCall %void %c %114
+        %116 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %55 %uint_2
+        %117 = OpLoad %v2half %116
+        %118 = OpVectorShuffle %v2half %117 %117 1 0
+        %115 = OpFunctionCall %void %d %118
+        %120 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %55 %uint_2
+        %121 = OpLoad %v2half %120
+        %122 = OpVectorShuffle %v2half %121 %121 1 0
+        %123 = OpCompositeExtract %half %122 0
+        %119 = OpFunctionCall %void %e %123
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..42d9a06
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat3x2<f16>) {
+}
+
+fn d(v : vec2<f16>) {
+}
+
+fn e(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].yx);
+  e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl
new file mode 100644
index 0000000..f41ffc6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ebcb896
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,48 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 3, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 260u);
+  uint ubo_load_3 = u[0].z;
+  p[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5ad13cb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,53 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 3, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 260u);
+  uint ubo_load_3 = u[0].z;
+  p[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx;
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000022E1691F8F0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..c986125
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,106 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat3x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  int after;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  int after;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat3x2(val.m_0, val.m_1, val.m_2), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.after, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25, val.pad_26);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x2 load_u_inner_2_m() {
+  return f16mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..52627dc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half3x2 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = half2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..caa8191
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,177 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 106
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 12
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v2half = OpTypeMatrix %v2half 3
+          %S = OpTypeStruct %int %mat3v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %36 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %49 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %62 = OpTypeFunction %mat3v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %82 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat3v2half = OpTypePointer Private %mat3v2half
+        %100 = OpConstantNull %int
+%_ptr_Private_v2half = OpTypePointer Private %v2half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2half %val 1
+         %23 = OpCompositeExtract %v2half %val 2
+         %24 = OpCompositeExtract %v2half %val 3
+         %25 = OpCompositeConstruct %mat3v2half %22 %23 %24
+         %26 = OpCompositeExtract %int %val 4
+         %27 = OpCompositeConstruct %S %21 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %36
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
+               OpBranch %37
+         %37 = OpLabel
+               OpLoopMerge %38 %39 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %uint %i
+         %43 = OpULessThan %bool %42 %uint_4
+         %41 = OpLogicalNot %bool %43
+               OpSelectionMerge %45 None
+               OpBranchConditional %41 %46 %45
+         %46 = OpLabel
+               OpBranch %38
+         %45 = OpLabel
+               OpStore %var_for_index %val_0
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_S %arr %50
+         %54 = OpLoad %uint %i
+         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
+         %57 = OpLoad %S_std140 %56
+         %53 = OpFunctionCall %S %conv_S %57
+               OpStore %52 %53
+               OpBranch %39
+         %39 = OpLabel
+         %58 = OpLoad %uint %i
+         %60 = OpIAdd %uint %58 %uint_1
+               OpStore %i %60
+               OpBranch %37
+         %38 = OpLabel
+         %61 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %61
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v2half None %62
+         %64 = OpLabel
+         %69 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %72 = OpAccessChain %_ptr_Uniform_v2half %69 %uint_1
+         %73 = OpLoad %v2half %72
+         %75 = OpAccessChain %_ptr_Uniform_v2half %69 %uint_2
+         %76 = OpLoad %v2half %75
+         %79 = OpAccessChain %_ptr_Uniform_v2half %69 %uint_3
+         %80 = OpLoad %v2half %79
+         %81 = OpCompositeConstruct %mat3v2half %73 %76 %80
+               OpReturnValue %81
+               OpFunctionEnd
+          %f = OpFunction %void None %82
+         %85 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %89 = OpLoad %_arr_S_std140_uint_4 %88
+         %86 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %89
+               OpStore %p %86
+         %92 = OpAccessChain %_ptr_Private_S %p %int_1
+         %94 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %95 = OpLoad %S_std140 %94
+         %93 = OpFunctionCall %S %conv_S %95
+               OpStore %92 %93
+         %98 = OpAccessChain %_ptr_Private_mat3v2half %p %int_3 %uint_1
+         %99 = OpFunctionCall %mat3v2half %load_u_inner_2_m
+               OpStore %98 %99
+        %102 = OpAccessChain %_ptr_Private_v2half %p %int_1 %uint_1 %100
+        %103 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %36 %uint_2
+        %104 = OpLoad %v2half %103
+        %105 = OpVectorShuffle %v2half %104 %104 1 0
+               OpStore %102 %105
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..9ee7dbd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl
new file mode 100644
index 0000000..49b2e45
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0a5e75f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,69 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 4u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 3, 2> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 4u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 388u, tint_symbol_8(u, 260u));
+  uint ubo_load_3 = u[0].z;
+  s.Store<vector<float16_t, 2> >(132u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2de9430
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,74 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 4u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 3, 2> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 4u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 388u, tint_symbol_8(u, 260u));
+  uint ubo_load_3 = u[0].z;
+  s.Store<vector<float16_t, 2> >(132u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001BBE330BB00(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..0718809
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,109 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat3x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  int after;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  int after;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat3x2(val.m_0, val.m_1, val.m_2), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.after, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25, val.pad_26);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x2 load_u_inner_2_m() {
+  return f16mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..6f7653d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half3x2 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = half2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..08e1b0d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,186 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 109
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 12
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v2half = OpTypeMatrix %v2half 3
+          %S = OpTypeStruct %int %mat3v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %34 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat3v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %83 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat3v2half = OpTypePointer StorageBuffer %mat3v2half
+        %103 = OpConstantNull %int
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2half %val 1
+         %23 = OpCompositeExtract %v2half %val 2
+         %24 = OpCompositeExtract %v2half %val 3
+         %25 = OpCompositeConstruct %mat3v2half %22 %23 %24
+         %26 = OpCompositeExtract %int %val 4
+         %27 = OpCompositeConstruct %S %21 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v2half None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v2half %70 %uint_1
+         %74 = OpLoad %v2half %73
+         %76 = OpAccessChain %_ptr_Uniform_v2half %70 %uint_2
+         %77 = OpLoad %v2half %76
+         %80 = OpAccessChain %_ptr_Uniform_v2half %70 %uint_3
+         %81 = OpLoad %v2half %80
+         %82 = OpCompositeConstruct %mat3v2half %74 %77 %81
+               OpReturnValue %82
+               OpFunctionEnd
+          %f = OpFunction %void None %83
+         %86 = OpLabel
+         %88 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %91 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %92 = OpLoad %_arr_S_std140_uint_4 %91
+         %89 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %92
+               OpStore %88 %89
+         %95 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %97 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %98 = OpLoad %S_std140 %97
+         %96 = OpFunctionCall %S %conv_S %98
+               OpStore %95 %96
+        %101 = OpAccessChain %_ptr_StorageBuffer_mat3v2half %s %uint_0 %int_3 %uint_1
+        %102 = OpFunctionCall %mat3v2half %load_u_inner_2_m
+               OpStore %101 %102
+        %105 = OpAccessChain %_ptr_StorageBuffer_v2half %s %uint_0 %int_1 %uint_1 %103
+        %106 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %37 %uint_2
+        %107 = OpLoad %v2half %106
+        %108 = OpVectorShuffle %v2half %107 %107 1 0
+               OpStore %105 %108
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..cfab5f9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..0b2ba99
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9b84706
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,64 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 2> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 4u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 260u);
+  uint ubo_load_3 = u[0].z;
+  w[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2378569
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,69 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 2> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 4u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 260u);
+  uint ubo_load_3 = u[0].z;
+  w[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000194753403B0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..0a5ebca2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,114 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat3x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  int after;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  int after;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat3x2(val.m_0, val.m_1, val.m_2), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.after, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25, val.pad_26);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x2 load_u_inner_2_m() {
+  return f16mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, f16mat3x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..1ebcfae
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,47 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half3x2 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = half2((*(tint_symbol_2))[0].m[1]).yx;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..359d3ae
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,220 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 131
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 12
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %v2half %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v2half = OpTypeMatrix %v2half 3
+          %S = OpTypeStruct %int %mat3v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %35 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %38 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %51 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %64 = OpTypeFunction %mat3v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %84 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+        %102 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat3v2half = OpTypePointer Workgroup %mat3v2half
+        %120 = OpConstantNull %int
+%_ptr_Workgroup_v2half = OpTypePointer Workgroup %v2half
+        %126 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v2half %val 1
+         %24 = OpCompositeExtract %v2half %val 2
+         %25 = OpCompositeExtract %v2half %val 3
+         %26 = OpCompositeConstruct %mat3v2half %23 %24 %25
+         %27 = OpCompositeExtract %int %val 4
+         %28 = OpCompositeConstruct %S %22 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
+          %i = OpVariable %_ptr_Function_uint Function %38
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
+               OpBranch %39
+         %39 = OpLabel
+               OpLoopMerge %40 %41 None
+               OpBranch %42
+         %42 = OpLabel
+         %44 = OpLoad %uint %i
+         %45 = OpULessThan %bool %44 %uint_4
+         %43 = OpLogicalNot %bool %45
+               OpSelectionMerge %47 None
+               OpBranchConditional %43 %48 %47
+         %48 = OpLabel
+               OpBranch %40
+         %47 = OpLabel
+               OpStore %var_for_index %val_0
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_S %arr %52
+         %56 = OpLoad %uint %i
+         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
+         %59 = OpLoad %S_std140 %58
+         %55 = OpFunctionCall %S %conv_S %59
+               OpStore %54 %55
+               OpBranch %41
+         %41 = OpLabel
+         %60 = OpLoad %uint %i
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %i %62
+               OpBranch %39
+         %40 = OpLabel
+         %63 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %63
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v2half None %64
+         %66 = OpLabel
+         %71 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %74 = OpAccessChain %_ptr_Uniform_v2half %71 %uint_1
+         %75 = OpLoad %v2half %74
+         %77 = OpAccessChain %_ptr_Uniform_v2half %71 %uint_2
+         %78 = OpLoad %v2half %77
+         %81 = OpAccessChain %_ptr_Uniform_v2half %71 %uint_3
+         %82 = OpLoad %v2half %81
+         %83 = OpCompositeConstruct %mat3v2half %75 %78 %82
+               OpReturnValue %83
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %84
+%local_invocation_index = OpFunctionParameter %uint
+         %88 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %38
+               OpStore %idx %local_invocation_index
+               OpBranch %90
+         %90 = OpLabel
+               OpLoopMerge %91 %92 None
+               OpBranch %93
+         %93 = OpLabel
+         %95 = OpLoad %uint %idx
+         %96 = OpULessThan %bool %95 %uint_4
+         %94 = OpLogicalNot %bool %96
+               OpSelectionMerge %97 None
+               OpBranchConditional %94 %98 %97
+         %98 = OpLabel
+               OpBranch %91
+         %97 = OpLabel
+         %99 = OpLoad %uint %idx
+        %101 = OpAccessChain %_ptr_Workgroup_S %w %99
+               OpStore %101 %102
+               OpBranch %92
+         %92 = OpLabel
+        %103 = OpLoad %uint %idx
+        %104 = OpIAdd %uint %103 %uint_1
+               OpStore %idx %104
+               OpBranch %90
+         %91 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %109 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %110 = OpLoad %_arr_S_std140_uint_4 %109
+        %107 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %110
+               OpStore %w %107
+        %112 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %114 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %115 = OpLoad %S_std140 %114
+        %113 = OpFunctionCall %S %conv_S %115
+               OpStore %112 %113
+        %118 = OpAccessChain %_ptr_Workgroup_mat3v2half %w %int_3 %uint_1
+        %119 = OpFunctionCall %mat3v2half %load_u_inner_2_m
+               OpStore %118 %119
+        %122 = OpAccessChain %_ptr_Workgroup_v2half %w %int_1 %uint_1 %120
+        %123 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %38 %uint_2
+        %124 = OpLoad %v2half %123
+        %125 = OpVectorShuffle %v2half %124 %124 1 0
+               OpStore %122 %125
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %126
+        %128 = OpLabel
+        %130 = OpLoad %uint %local_invocation_index_1
+        %129 = OpFunctionCall %void %f_inner %130
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..9042e63
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..310b316
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,32 @@
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat3x2<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec2<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/dynamic_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..172fe38
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,29 @@
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat3x2<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec2<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/struct/mat3x2/static_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/struct/mat3x2_f32/static_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl
new file mode 100644
index 0000000..5a167ce
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat3x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].yx);
+    let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2f2a028
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float3x2 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x3 t = transpose(tint_symbol(u, 264u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2f2a028
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float3x2 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x3 t = transpose(tint_symbol(u, 264u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..f891a29
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,83 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat3x2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+mat3x2 load_u_inner_2_m() {
+  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  mat2x3 t = transpose(load_u_inner_2_m());
+  float l = length(u.inner[0u].m_1.yx);
+  float a = abs(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..d42da49
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float3x2 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float2x3 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float2((*(tint_symbol))[0].m[1]).yx);
+  float const a = fabs(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..76771a7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,82 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 52
+; Schema: 0
+               OpCapability Shader
+         %42 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+         %11 = OpTypeFunction %mat3v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+         %43 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat3v2float None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_1
+         %24 = OpLoad %v2float %23
+         %26 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_2
+         %27 = OpLoad %v2float %26
+         %30 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_3
+         %31 = OpLoad %v2float %30
+         %32 = OpCompositeConstruct %mat3v2float %24 %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %40 = OpFunctionCall %mat3v2float %load_u_inner_2_m
+         %37 = OpTranspose %mat2v3float %40
+         %44 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %43 %uint_2
+         %45 = OpLoad %v2float %44
+         %46 = OpVectorShuffle %v2float %45 %45 1 0
+         %41 = OpExtInst %float %42 Length %46
+         %48 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %43 %uint_2
+         %49 = OpLoad %v2float %48
+         %50 = OpVectorShuffle %v2float %49 %49 1 0
+         %51 = OpCompositeExtract %float %50 0
+         %47 = OpExtInst %float %42 FAbs %51
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..345182e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat3x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].yx);
+  let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl
new file mode 100644
index 0000000..66e7dbb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl

@@ -0,0 +1,23 @@
+struct S {
+  before : i32,
+  m : mat3x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat3x2<f32>) {}
+fn d(v : vec2<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].yx);
+    e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0e79198
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,62 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float3x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float3x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0e79198
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,62 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float3x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float3x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..d71345c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,114 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat3x2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat3x2 m) {
+}
+
+void d(vec2 v) {
+}
+
+void e(float f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat3x2(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat3x2 load_u_inner_2_m() {
+  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.yx);
+  e(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..c42c800
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float3x2 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float3x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float2((*(tint_symbol))[0].m[1]).yx);
+  e(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..43e3bea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,206 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 124
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+%mat3v2float = OpTypeMatrix %v2float 3
+          %S = OpTypeStruct %int %mat3v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat3v2float
+         %27 = OpTypeFunction %void %v2float
+         %31 = OpTypeFunction %void %float
+         %35 = OpTypeFunction %S %S_std140
+         %46 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %52 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %55 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %68 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %81 = OpTypeFunction %mat3v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+        %101 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat3v2float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v2float
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %float
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v2float %val 1
+         %41 = OpCompositeExtract %v2float %val 2
+         %42 = OpCompositeExtract %v2float %val 3
+         %43 = OpCompositeConstruct %mat3v2float %40 %41 %42
+         %44 = OpCompositeExtract %int %val 4
+         %45 = OpCompositeConstruct %S %39 %43 %44
+               OpReturnValue %45
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %46
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %49 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %52
+          %i = OpVariable %_ptr_Function_uint Function %55
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %68
+               OpBranch %56
+         %56 = OpLabel
+               OpLoopMerge %57 %58 None
+               OpBranch %59
+         %59 = OpLabel
+         %61 = OpLoad %uint %i
+         %62 = OpULessThan %bool %61 %uint_4
+         %60 = OpLogicalNot %bool %62
+               OpSelectionMerge %64 None
+               OpBranchConditional %60 %65 %64
+         %65 = OpLabel
+               OpBranch %57
+         %64 = OpLabel
+               OpStore %var_for_index %val_0
+         %69 = OpLoad %uint %i
+         %71 = OpAccessChain %_ptr_Function_S %arr %69
+         %73 = OpLoad %uint %i
+         %75 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %73
+         %76 = OpLoad %S_std140 %75
+         %72 = OpFunctionCall %S %conv_S %76
+               OpStore %71 %72
+               OpBranch %58
+         %58 = OpLabel
+         %77 = OpLoad %uint %i
+         %79 = OpIAdd %uint %77 %uint_1
+               OpStore %i %79
+               OpBranch %56
+         %57 = OpLabel
+         %80 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %80
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v2float None %81
+         %83 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %91 = OpAccessChain %_ptr_Uniform_v2float %88 %uint_1
+         %92 = OpLoad %v2float %91
+         %94 = OpAccessChain %_ptr_Uniform_v2float %88 %uint_2
+         %95 = OpLoad %v2float %94
+         %98 = OpAccessChain %_ptr_Uniform_v2float %88 %uint_3
+         %99 = OpLoad %v2float %98
+        %100 = OpCompositeConstruct %mat3v2float %92 %95 %99
+               OpReturnValue %100
+               OpFunctionEnd
+          %f = OpFunction %void None %101
+        %103 = OpLabel
+        %107 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %108 = OpLoad %_arr_S_std140_uint_4 %107
+        %105 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %108
+        %104 = OpFunctionCall %void %a %105
+        %111 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %112 = OpLoad %S_std140 %111
+        %110 = OpFunctionCall %S %conv_S %112
+        %109 = OpFunctionCall %void %b %110
+        %114 = OpFunctionCall %mat3v2float %load_u_inner_2_m
+        %113 = OpFunctionCall %void %c %114
+        %116 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %55 %uint_2
+        %117 = OpLoad %v2float %116
+        %118 = OpVectorShuffle %v2float %117 %117 1 0
+        %115 = OpFunctionCall %void %d %118
+        %120 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %55 %uint_2
+        %121 = OpLoad %v2float %120
+        %122 = OpVectorShuffle %v2float %121 %121 1 0
+        %123 = OpCompositeExtract %float %122 0
+        %119 = OpFunctionCall %void %e %123
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..f6aaf33
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,32 @@
+struct S {
+  before : i32,
+  m : mat3x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat3x2<f32>) {
+}
+
+fn d(v : vec2<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].yx);
+  e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl
new file mode 100644
index 0000000..4ffe77c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat3x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5a5e8b1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,47 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float3x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5a5e8b1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,47 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float3x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..75d3a4a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,99 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat3x2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat3x2(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat3x2 load_u_inner_2_m() {
+  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..deefeba
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float3x2 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..e74eeb1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,173 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 106
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+          %S = OpTypeStruct %int %mat3v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %36 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %49 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %62 = OpTypeFunction %mat3v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %82 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat3v2float = OpTypePointer Private %mat3v2float
+        %100 = OpConstantNull %int
+%_ptr_Private_v2float = OpTypePointer Private %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeExtract %v2float %val 3
+         %25 = OpCompositeConstruct %mat3v2float %22 %23 %24
+         %26 = OpCompositeExtract %int %val 4
+         %27 = OpCompositeConstruct %S %21 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %36
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
+               OpBranch %37
+         %37 = OpLabel
+               OpLoopMerge %38 %39 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %uint %i
+         %43 = OpULessThan %bool %42 %uint_4
+         %41 = OpLogicalNot %bool %43
+               OpSelectionMerge %45 None
+               OpBranchConditional %41 %46 %45
+         %46 = OpLabel
+               OpBranch %38
+         %45 = OpLabel
+               OpStore %var_for_index %val_0
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_S %arr %50
+         %54 = OpLoad %uint %i
+         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
+         %57 = OpLoad %S_std140 %56
+         %53 = OpFunctionCall %S %conv_S %57
+               OpStore %52 %53
+               OpBranch %39
+         %39 = OpLabel
+         %58 = OpLoad %uint %i
+         %60 = OpIAdd %uint %58 %uint_1
+               OpStore %i %60
+               OpBranch %37
+         %38 = OpLabel
+         %61 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %61
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v2float None %62
+         %64 = OpLabel
+         %69 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %72 = OpAccessChain %_ptr_Uniform_v2float %69 %uint_1
+         %73 = OpLoad %v2float %72
+         %75 = OpAccessChain %_ptr_Uniform_v2float %69 %uint_2
+         %76 = OpLoad %v2float %75
+         %79 = OpAccessChain %_ptr_Uniform_v2float %69 %uint_3
+         %80 = OpLoad %v2float %79
+         %81 = OpCompositeConstruct %mat3v2float %73 %76 %80
+               OpReturnValue %81
+               OpFunctionEnd
+          %f = OpFunction %void None %82
+         %85 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %89 = OpLoad %_arr_S_std140_uint_4 %88
+         %86 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %89
+               OpStore %p %86
+         %92 = OpAccessChain %_ptr_Private_S %p %int_1
+         %94 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %95 = OpLoad %S_std140 %94
+         %93 = OpFunctionCall %S %conv_S %95
+               OpStore %92 %93
+         %98 = OpAccessChain %_ptr_Private_mat3v2float %p %int_3 %uint_1
+         %99 = OpFunctionCall %mat3v2float %load_u_inner_2_m
+               OpStore %98 %99
+        %102 = OpAccessChain %_ptr_Private_v2float %p %int_1 %uint_1 %100
+        %103 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %36 %uint_2
+        %104 = OpLoad %v2float %103
+        %105 = OpVectorShuffle %v2float %104 %104 1 0
+               OpStore %102 %105
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..3872d22
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat3x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl
new file mode 100644
index 0000000..232fa23
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat3x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2869d03
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,68 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float3x2 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  s.Store2(136u, asuint(asfloat(u[1].xy).yx));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2869d03
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,68 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float3x2 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  s.Store2(136u, asuint(asfloat(u[1].xy).yx));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..c09c8f9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,102 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat3x2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat3x2(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat3x2 load_u_inner_2_m() {
+  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..8bdc778
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float3x2 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..dca749f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,182 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 109
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+          %S = OpTypeStruct %int %mat3v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %34 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat3v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %83 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
+        %103 = OpConstantNull %int
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeExtract %v2float %val 3
+         %25 = OpCompositeConstruct %mat3v2float %22 %23 %24
+         %26 = OpCompositeExtract %int %val 4
+         %27 = OpCompositeConstruct %S %21 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v2float None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_1
+         %74 = OpLoad %v2float %73
+         %76 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_2
+         %77 = OpLoad %v2float %76
+         %80 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_3
+         %81 = OpLoad %v2float %80
+         %82 = OpCompositeConstruct %mat3v2float %74 %77 %81
+               OpReturnValue %82
+               OpFunctionEnd
+          %f = OpFunction %void None %83
+         %86 = OpLabel
+         %88 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %91 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %92 = OpLoad %_arr_S_std140_uint_4 %91
+         %89 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %92
+               OpStore %88 %89
+         %95 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %97 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %98 = OpLoad %S_std140 %97
+         %96 = OpFunctionCall %S %conv_S %98
+               OpStore %95 %96
+        %101 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %s %uint_0 %int_3 %uint_1
+        %102 = OpFunctionCall %mat3v2float %load_u_inner_2_m
+               OpStore %101 %102
+        %105 = OpAccessChain %_ptr_StorageBuffer_v2float %s %uint_0 %int_1 %uint_1 %103
+        %106 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
+        %107 = OpLoad %v2float %106
+        %108 = OpVectorShuffle %v2float %107 %107 1 0
+               OpStore %105 %108
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..bda40b3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat3x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..a06722a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat3x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..35c2670
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,63 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x2 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..35c2670
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,63 @@
+struct S {
+  int before;
+  float3x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x2 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..e5c3eb1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,107 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat3x2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat3x2(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat3x2 load_u_inner_2_m() {
+  return mat3x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, mat3x2(vec2(0.0f), vec2(0.0f), vec2(0.0f)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..03a5ba8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float3x2 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float2((*(tint_symbol_2))[0].m[1]).yx;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..e5ef490
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,216 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 131
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat3v2float = OpTypeMatrix %v2float 3
+          %S = OpTypeStruct %int %mat3v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %35 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %38 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %51 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %64 = OpTypeFunction %mat3v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %84 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+        %102 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat3v2float = OpTypePointer Workgroup %mat3v2float
+        %120 = OpConstantNull %int
+%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
+        %126 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v2float %val 1
+         %24 = OpCompositeExtract %v2float %val 2
+         %25 = OpCompositeExtract %v2float %val 3
+         %26 = OpCompositeConstruct %mat3v2float %23 %24 %25
+         %27 = OpCompositeExtract %int %val 4
+         %28 = OpCompositeConstruct %S %22 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
+          %i = OpVariable %_ptr_Function_uint Function %38
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
+               OpBranch %39
+         %39 = OpLabel
+               OpLoopMerge %40 %41 None
+               OpBranch %42
+         %42 = OpLabel
+         %44 = OpLoad %uint %i
+         %45 = OpULessThan %bool %44 %uint_4
+         %43 = OpLogicalNot %bool %45
+               OpSelectionMerge %47 None
+               OpBranchConditional %43 %48 %47
+         %48 = OpLabel
+               OpBranch %40
+         %47 = OpLabel
+               OpStore %var_for_index %val_0
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_S %arr %52
+         %56 = OpLoad %uint %i
+         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
+         %59 = OpLoad %S_std140 %58
+         %55 = OpFunctionCall %S %conv_S %59
+               OpStore %54 %55
+               OpBranch %41
+         %41 = OpLabel
+         %60 = OpLoad %uint %i
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %i %62
+               OpBranch %39
+         %40 = OpLabel
+         %63 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %63
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v2float None %64
+         %66 = OpLabel
+         %71 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %74 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_1
+         %75 = OpLoad %v2float %74
+         %77 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_2
+         %78 = OpLoad %v2float %77
+         %81 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_3
+         %82 = OpLoad %v2float %81
+         %83 = OpCompositeConstruct %mat3v2float %75 %78 %82
+               OpReturnValue %83
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %84
+%local_invocation_index = OpFunctionParameter %uint
+         %88 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %38
+               OpStore %idx %local_invocation_index
+               OpBranch %90
+         %90 = OpLabel
+               OpLoopMerge %91 %92 None
+               OpBranch %93
+         %93 = OpLabel
+         %95 = OpLoad %uint %idx
+         %96 = OpULessThan %bool %95 %uint_4
+         %94 = OpLogicalNot %bool %96
+               OpSelectionMerge %97 None
+               OpBranchConditional %94 %98 %97
+         %98 = OpLabel
+               OpBranch %91
+         %97 = OpLabel
+         %99 = OpLoad %uint %idx
+        %101 = OpAccessChain %_ptr_Workgroup_S %w %99
+               OpStore %101 %102
+               OpBranch %92
+         %92 = OpLabel
+        %103 = OpLoad %uint %idx
+        %104 = OpIAdd %uint %103 %uint_1
+               OpStore %idx %104
+               OpBranch %90
+         %91 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %109 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %110 = OpLoad %_arr_S_std140_uint_4 %109
+        %107 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %110
+               OpStore %w %107
+        %112 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %114 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %115 = OpLoad %S_std140 %114
+        %113 = OpFunctionCall %S %conv_S %115
+               OpStore %112 %113
+        %118 = OpAccessChain %_ptr_Workgroup_mat3v2float %w %int_3 %uint_1
+        %119 = OpFunctionCall %mat3v2float %load_u_inner_2_m
+               OpStore %118 %119
+        %122 = OpAccessChain %_ptr_Workgroup_v2float %w %int_1 %uint_1 %120
+        %123 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %38 %uint_2
+        %124 = OpLoad %v2float %123
+        %125 = OpVectorShuffle %v2float %124 %124 1 0
+               OpStore %122 %125
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %126
+        %128 = OpLabel
+        %130 = OpLoad %uint %local_invocation_index_1
+        %129 = OpFunctionCall %void %f_inner %130
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..f44928d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x2_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat3x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..b428216
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat3x3<f16>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec3<f16>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f16             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ef9e0c4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,93 @@
+struct Inner {
+  matrix<float16_t, 3, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 3> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 3, 3> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_7 = a[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const vector<float16_t, 3> l_a_i_a_i_m_i = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..6a4d6e3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,98 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 3, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 3> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 3, 3> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_7 = a[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const vector<float16_t, 3> l_a_i_a_i_m_i = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001CC9AE84850(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..25724b8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,152 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat3 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+};
+
+struct Inner_std140 {
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat3(val.m_0, val.m_1, val.m_2), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return f16mat3(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1, a.inner[s_save].a[s_save_1].m_2);
+}
+
+f16vec3 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2;
+      break;
+    }
+    default: {
+      return f16vec3(0.0hf);
+      break;
+    }
+  }
+}
+
+float16_t load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2[p3];
+      break;
+    }
+    default: {
+      return 0.0hf;
+      break;
+    }
+  }
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat3 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  f16vec3 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat3 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  f16vec3 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float16_t l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..5fbd9f0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half3x3 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  half3x3 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  half3 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  half const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..c6e47d1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,324 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 204
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%Inner_std140 = OpTypeStruct %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat3v3half = OpTypeMatrix %v3half 3
+      %Inner = OpTypeStruct %mat3v3half
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %34 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %41 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %44 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %57 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %70 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %78 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %85 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %98 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %110 = OpTypeFunction %mat3v3half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_2 = OpConstant %uint 2
+        %131 = OpTypeFunction %v3half %uint %uint %uint
+        %148 = OpConstantNull %v3half
+        %149 = OpTypeFunction %half %uint %uint %uint %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %168 = OpConstantNull %half
+       %void = OpTypeVoid
+        %169 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v3half %val 0
+         %30 = OpCompositeExtract %v3half %val 1
+         %31 = OpCompositeExtract %v3half %val 2
+         %32 = OpCompositeConstruct %mat3v3half %29 %30 %31
+         %33 = OpCompositeConstruct %Inner %32
+               OpReturnValue %33
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %34
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %38 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %41
+        %i_0 = OpVariable %_ptr_Function_uint Function %44
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %57
+               OpBranch %45
+         %45 = OpLabel
+               OpLoopMerge %46 %47 None
+               OpBranch %48
+         %48 = OpLabel
+         %50 = OpLoad %uint %i_0
+         %51 = OpULessThan %bool %50 %uint_4
+         %49 = OpLogicalNot %bool %51
+               OpSelectionMerge %53 None
+               OpBranchConditional %49 %54 %53
+         %54 = OpLabel
+               OpBranch %46
+         %53 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %58 = OpLoad %uint %i_0
+         %60 = OpAccessChain %_ptr_Function_Inner %arr %58
+         %62 = OpLoad %uint %i_0
+         %64 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %62
+         %65 = OpLoad %Inner_std140 %64
+         %61 = OpFunctionCall %Inner %conv_Inner %65
+               OpStore %60 %61
+               OpBranch %47
+         %47 = OpLabel
+         %66 = OpLoad %uint %i_0
+         %68 = OpIAdd %uint %66 %uint_1
+               OpStore %i_0 %68
+               OpBranch %45
+         %46 = OpLabel
+         %69 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %69
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %70
+      %val_1 = OpFunctionParameter %Outer_std140
+         %74 = OpLabel
+         %76 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %75 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %76
+         %77 = OpCompositeConstruct %Outer %75
+               OpReturnValue %77
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %78
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %82 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %85
+        %i_1 = OpVariable %_ptr_Function_uint Function %44
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %98
+               OpBranch %87
+         %87 = OpLabel
+               OpLoopMerge %88 %89 None
+               OpBranch %90
+         %90 = OpLabel
+         %92 = OpLoad %uint %i_1
+         %93 = OpULessThan %bool %92 %uint_4
+         %91 = OpLogicalNot %bool %93
+               OpSelectionMerge %94 None
+               OpBranchConditional %91 %95 %94
+         %95 = OpLabel
+               OpBranch %88
+         %94 = OpLabel
+               OpStore %var_for_index %val_2
+         %99 = OpLoad %uint %i_1
+        %101 = OpAccessChain %_ptr_Function_Outer %arr_0 %99
+        %103 = OpLoad %uint %i_1
+        %105 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %103
+        %106 = OpLoad %Outer_std140 %105
+        %102 = OpFunctionCall %Outer %conv_Outer %106
+               OpStore %101 %102
+               OpBranch %89
+         %89 = OpLabel
+        %107 = OpLoad %uint %i_1
+        %108 = OpIAdd %uint %107 %uint_1
+               OpStore %i_1 %108
+               OpBranch %87
+         %88 = OpLabel
+        %109 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %109
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m = OpFunction %mat3v3half None %110
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %114 = OpLabel
+        %118 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
+        %121 = OpAccessChain %_ptr_Uniform_v3half %118 %uint_0
+        %122 = OpLoad %v3half %121
+        %124 = OpAccessChain %_ptr_Uniform_v3half %118 %uint_1
+        %125 = OpLoad %v3half %124
+        %128 = OpAccessChain %_ptr_Uniform_v3half %118 %uint_2
+        %129 = OpLoad %v3half %128
+        %130 = OpCompositeConstruct %mat3v3half %122 %125 %129
+               OpReturnValue %130
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2 = OpFunction %v3half None %131
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %136 = OpLabel
+               OpSelectionMerge %137 None
+               OpSwitch %p2 %138 0 %139 1 %140 2 %141
+        %139 = OpLabel
+        %142 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %143 = OpLoad %v3half %142
+               OpReturnValue %143
+        %140 = OpLabel
+        %144 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %145 = OpLoad %v3half %144
+               OpReturnValue %145
+        %141 = OpLabel
+        %146 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_2
+        %147 = OpLoad %v3half %146
+               OpReturnValue %147
+        %138 = OpLabel
+               OpReturnValue %148
+        %137 = OpLabel
+               OpReturnValue %148
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %half None %149
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %155 = OpLabel
+               OpSelectionMerge %156 None
+               OpSwitch %p2_0 %157 0 %158 1 %159 2 %160
+        %158 = OpLabel
+        %162 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %163 = OpLoad %half %162
+               OpReturnValue %163
+        %159 = OpLabel
+        %164 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %165 = OpLoad %half %164
+               OpReturnValue %165
+        %160 = OpLabel
+        %166 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_2 %p3
+        %167 = OpLoad %half %166
+               OpReturnValue %167
+        %157 = OpLabel
+               OpReturnValue %168
+        %156 = OpLabel
+               OpReturnValue %168
+               OpFunctionEnd
+          %f = OpFunction %void None %169
+        %172 = OpLabel
+        %173 = OpFunctionCall %int %i
+        %174 = OpFunctionCall %int %i
+        %175 = OpFunctionCall %int %i
+        %178 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %179 = OpLoad %_arr_Outer_std140_uint_4 %178
+        %176 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %179
+        %182 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %173
+        %183 = OpLoad %Outer_std140 %182
+        %180 = OpFunctionCall %Outer %conv_Outer %183
+        %186 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %173 %uint_0
+        %187 = OpLoad %_arr_Inner_std140_uint_4 %186
+        %184 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %187
+        %189 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %173 %uint_0 %174
+        %190 = OpLoad %Inner_std140 %189
+        %188 = OpFunctionCall %Inner %conv_Inner %190
+        %192 = OpBitcast %uint %173
+        %193 = OpBitcast %uint %174
+        %191 = OpFunctionCall %mat3v3half %load_a_inner_p0_a_p1_m %192 %193
+        %195 = OpBitcast %uint %173
+        %196 = OpBitcast %uint %174
+        %197 = OpBitcast %uint %175
+        %194 = OpFunctionCall %v3half %load_a_inner_p0_a_p1_m_p2 %195 %196 %197
+        %198 = OpFunctionCall %int %i
+        %200 = OpBitcast %uint %173
+        %201 = OpBitcast %uint %174
+        %202 = OpBitcast %uint %175
+        %203 = OpBitcast %uint %198
+        %199 = OpFunctionCall %half %load_a_inner_p0_a_p1_m_p2_p3 %200 %201 %202 %203
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..07b0c80
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,36 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat3x3<f16> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec3<f16> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f16 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..84e1840
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,31 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat3x3<f16>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec3<f16>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f16             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ed02e03
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,76 @@
+struct Inner {
+  matrix<float16_t, 3, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 3, 3> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 3, 3> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_6 = a[56].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const vector<float16_t, 3> l_a_3_a_2_m_1 = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..6e52af4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,81 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 3, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 3, 3> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 3, 3> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_6 = a[56].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const vector<float16_t, 3> l_a_3_a_2_m_1 = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021CC01A0970(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..17bbfac
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,98 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat3 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+};
+
+struct Inner_std140 {
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat3(val.m_0, val.m_1, val.m_2), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3 load_a_inner_3_a_2_m() {
+  return f16mat3(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1, a.inner[3u].a[2u].m_2);
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat3 p_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec3 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat3 l_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec3 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float16_t l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..b7594a9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half3x3 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  half3x3 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  half3 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  half const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..a51fd59
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,232 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 144
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%Inner_std140 = OpTypeStruct %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+      %Inner = OpTypeStruct %mat3v3half
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %23 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %30 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %33 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %46 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %59 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %67 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %74 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %87 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+         %99 = OpTypeFunction %mat3v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+       %void = OpTypeVoid
+        %119 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v3half %val 0
+         %19 = OpCompositeExtract %v3half %val 1
+         %20 = OpCompositeExtract %v3half %val 2
+         %21 = OpCompositeConstruct %mat3v3half %18 %19 %20
+         %22 = OpCompositeConstruct %Inner %21
+               OpReturnValue %22
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %23
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %30
+          %i = OpVariable %_ptr_Function_uint Function %33
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %46
+               OpBranch %34
+         %34 = OpLabel
+               OpLoopMerge %35 %36 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %uint %i
+         %40 = OpULessThan %bool %39 %uint_4
+         %38 = OpLogicalNot %bool %40
+               OpSelectionMerge %42 None
+               OpBranchConditional %38 %43 %42
+         %43 = OpLabel
+               OpBranch %35
+         %42 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %47 = OpLoad %uint %i
+         %49 = OpAccessChain %_ptr_Function_Inner %arr %47
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %51
+         %54 = OpLoad %Inner_std140 %53
+         %50 = OpFunctionCall %Inner %conv_Inner %54
+               OpStore %49 %50
+               OpBranch %36
+         %36 = OpLabel
+         %55 = OpLoad %uint %i
+         %57 = OpIAdd %uint %55 %uint_1
+               OpStore %i %57
+               OpBranch %34
+         %35 = OpLabel
+         %58 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %58
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %59
+      %val_1 = OpFunctionParameter %Outer_std140
+         %63 = OpLabel
+         %65 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %64 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %65
+         %66 = OpCompositeConstruct %Outer %64
+               OpReturnValue %66
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %67
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %71 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %74
+        %i_0 = OpVariable %_ptr_Function_uint Function %33
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %87
+               OpBranch %76
+         %76 = OpLabel
+               OpLoopMerge %77 %78 None
+               OpBranch %79
+         %79 = OpLabel
+         %81 = OpLoad %uint %i_0
+         %82 = OpULessThan %bool %81 %uint_4
+         %80 = OpLogicalNot %bool %82
+               OpSelectionMerge %83 None
+               OpBranchConditional %80 %84 %83
+         %84 = OpLabel
+               OpBranch %77
+         %83 = OpLabel
+               OpStore %var_for_index %val_2
+         %88 = OpLoad %uint %i_0
+         %90 = OpAccessChain %_ptr_Function_Outer %arr_0 %88
+         %92 = OpLoad %uint %i_0
+         %94 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %92
+         %95 = OpLoad %Outer_std140 %94
+         %91 = OpFunctionCall %Outer %conv_Outer %95
+               OpStore %90 %91
+               OpBranch %78
+         %78 = OpLabel
+         %96 = OpLoad %uint %i_0
+         %97 = OpIAdd %uint %96 %uint_1
+               OpStore %i_0 %97
+               OpBranch %76
+         %77 = OpLabel
+         %98 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %98
+               OpFunctionEnd
+%load_a_inner_3_a_2_m = OpFunction %mat3v3half None %99
+        %101 = OpLabel
+        %107 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %110 = OpAccessChain %_ptr_Uniform_v3half %107 %uint_0
+        %111 = OpLoad %v3half %110
+        %113 = OpAccessChain %_ptr_Uniform_v3half %107 %uint_1
+        %114 = OpLoad %v3half %113
+        %116 = OpAccessChain %_ptr_Uniform_v3half %107 %uint_2
+        %117 = OpLoad %v3half %116
+        %118 = OpCompositeConstruct %mat3v3half %111 %114 %117
+               OpReturnValue %118
+               OpFunctionEnd
+          %f = OpFunction %void None %119
+        %122 = OpLabel
+        %125 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %126 = OpLoad %_arr_Outer_std140_uint_4 %125
+        %123 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %126
+        %129 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %130 = OpLoad %Outer_std140 %129
+        %127 = OpFunctionCall %Outer %conv_Outer %130
+        %133 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %134 = OpLoad %_arr_Inner_std140_uint_4 %133
+        %131 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %134
+        %136 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %137 = OpLoad %Inner_std140 %136
+        %135 = OpFunctionCall %Inner %conv_Inner %137
+        %138 = OpFunctionCall %mat3v3half %load_a_inner_3_a_2_m
+        %139 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %140 = OpLoad %v3half %139
+        %142 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %33
+        %143 = OpLoad %half %142
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..e7b4e1e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,29 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat3x3<f16> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec3<f16> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f16 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl
new file mode 100644
index 0000000..a8f6d50
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].zxy);
+    let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f79425f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,36 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 3> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy);
+  uint2 ubo_load_7 = u[1].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..bb722e2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,41 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 3> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy);
+  uint2 ubo_load_7 = u[1].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001BFA9E0F690(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..fdc2b36
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,84 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+f16mat3 load_u_inner_2_m() {
+  return f16mat3(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  f16mat3 t = transpose(load_u_inner_2_m());
+  float16_t l = length(u.inner[0u].m_1.zxy);
+  float16_t a = abs(u.inner[0u].m_1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..350c296
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half3x3 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  half3x3 const t = transpose((*(tint_symbol))[2].m);
+  half const l = length(half3((*(tint_symbol))[0].m[1]).zxy);
+  half const a = fabs(half3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..30a0ddd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,84 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 50
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %40 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+         %11 = OpTypeFunction %mat3v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+         %41 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat3v3half None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v3half %19 %uint_1
+         %24 = OpLoad %v3half %23
+         %26 = OpAccessChain %_ptr_Uniform_v3half %19 %uint_2
+         %27 = OpLoad %v3half %26
+         %30 = OpAccessChain %_ptr_Uniform_v3half %19 %uint_3
+         %31 = OpLoad %v3half %30
+         %32 = OpCompositeConstruct %mat3v3half %24 %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %38 = OpFunctionCall %mat3v3half %load_u_inner_2_m
+         %37 = OpTranspose %mat3v3half %38
+         %42 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %41 %uint_2
+         %43 = OpLoad %v3half %42
+         %44 = OpVectorShuffle %v3half %43 %43 2 0 1
+         %39 = OpExtInst %half %40 Length %44
+         %46 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %41 %uint_2
+         %47 = OpLoad %v3half %46
+         %48 = OpVectorShuffle %v3half %47 %47 2 0 1
+         %49 = OpCompositeExtract %half %48 0
+         %45 = OpExtInst %half %40 FAbs %49
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..58666b8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].zxy);
+  let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl
new file mode 100644
index 0000000..01fffdd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl

@@ -0,0 +1,25 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat3x3<f16>) {}
+fn d(v : vec3<f16>) {}
+fn e(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].zxy);
+    e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..08bbf31
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,77 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 3, 3> m) {
+}
+
+void d(vector<float16_t, 3> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy);
+  uint2 ubo_load_7 = u[1].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  e(vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f1deae3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,82 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 3, 3> m) {
+}
+
+void d(vector<float16_t, 3> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy);
+  uint2 ubo_load_7 = u[1].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  e(vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A1CC720100(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..263e4168
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,115 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(f16mat3 m) {
+}
+
+void d(f16vec3 v) {
+}
+
+void e(float16_t f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat3(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3 load_u_inner_2_m() {
+  return f16mat3(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.zxy);
+  e(u.inner[0u].m_1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..89e6084
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half3x3 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(half3x3 m) {
+}
+
+void d(half3 v) {
+}
+
+void e(half f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(half3((*(tint_symbol))[0].m[1]).zxy);
+  e(half3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..4bd94cd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,210 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 124
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat3v3half = OpTypeMatrix %v3half 3
+          %S = OpTypeStruct %int %mat3v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat3v3half
+         %27 = OpTypeFunction %void %v3half
+         %31 = OpTypeFunction %void %half
+         %35 = OpTypeFunction %S %S_std140
+         %46 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %52 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %55 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %68 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %81 = OpTypeFunction %mat3v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_3 = OpConstant %uint 3
+        %101 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat3v3half
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v3half
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %half
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v3half %val 1
+         %41 = OpCompositeExtract %v3half %val 2
+         %42 = OpCompositeExtract %v3half %val 3
+         %43 = OpCompositeConstruct %mat3v3half %40 %41 %42
+         %44 = OpCompositeExtract %int %val 4
+         %45 = OpCompositeConstruct %S %39 %43 %44
+               OpReturnValue %45
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %46
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %49 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %52
+          %i = OpVariable %_ptr_Function_uint Function %55
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %68
+               OpBranch %56
+         %56 = OpLabel
+               OpLoopMerge %57 %58 None
+               OpBranch %59
+         %59 = OpLabel
+         %61 = OpLoad %uint %i
+         %62 = OpULessThan %bool %61 %uint_4
+         %60 = OpLogicalNot %bool %62
+               OpSelectionMerge %64 None
+               OpBranchConditional %60 %65 %64
+         %65 = OpLabel
+               OpBranch %57
+         %64 = OpLabel
+               OpStore %var_for_index %val_0
+         %69 = OpLoad %uint %i
+         %71 = OpAccessChain %_ptr_Function_S %arr %69
+         %73 = OpLoad %uint %i
+         %75 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %73
+         %76 = OpLoad %S_std140 %75
+         %72 = OpFunctionCall %S %conv_S %76
+               OpStore %71 %72
+               OpBranch %58
+         %58 = OpLabel
+         %77 = OpLoad %uint %i
+         %79 = OpIAdd %uint %77 %uint_1
+               OpStore %i %79
+               OpBranch %56
+         %57 = OpLabel
+         %80 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %80
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v3half None %81
+         %83 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %91 = OpAccessChain %_ptr_Uniform_v3half %88 %uint_1
+         %92 = OpLoad %v3half %91
+         %94 = OpAccessChain %_ptr_Uniform_v3half %88 %uint_2
+         %95 = OpLoad %v3half %94
+         %98 = OpAccessChain %_ptr_Uniform_v3half %88 %uint_3
+         %99 = OpLoad %v3half %98
+        %100 = OpCompositeConstruct %mat3v3half %92 %95 %99
+               OpReturnValue %100
+               OpFunctionEnd
+          %f = OpFunction %void None %101
+        %103 = OpLabel
+        %107 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %108 = OpLoad %_arr_S_std140_uint_4 %107
+        %105 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %108
+        %104 = OpFunctionCall %void %a %105
+        %111 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %112 = OpLoad %S_std140 %111
+        %110 = OpFunctionCall %S %conv_S %112
+        %109 = OpFunctionCall %void %b %110
+        %114 = OpFunctionCall %mat3v3half %load_u_inner_2_m
+        %113 = OpFunctionCall %void %c %114
+        %116 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %55 %uint_2
+        %117 = OpLoad %v3half %116
+        %118 = OpVectorShuffle %v3half %117 %117 2 0 1
+        %115 = OpFunctionCall %void %d %118
+        %120 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %55 %uint_2
+        %121 = OpLoad %v3half %120
+        %122 = OpVectorShuffle %v3half %121 %121 2 0 1
+        %123 = OpCompositeExtract %half %122 0
+        %119 = OpFunctionCall %void %e %123
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..786a6ba
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat3x3<f16>) {
+}
+
+fn d(v : vec3<f16>) {
+}
+
+fn e(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].zxy);
+  e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl
new file mode 100644
index 0000000..ab176ca
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..dffe336
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,59 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 3, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  p[1].m[0] = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7c9105a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,64 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 3, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  p[1].m[0] = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy;
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000025A3E276990(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..35c7f1a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,100 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat3(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3 load_u_inner_2_m() {
+  return f16mat3(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..ff2ae9b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half3x3 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = half3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..26873c8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,177 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 106
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+          %S = OpTypeStruct %int %mat3v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %36 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %49 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %62 = OpTypeFunction %mat3v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %82 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat3v3half = OpTypePointer Private %mat3v3half
+        %100 = OpConstantNull %int
+%_ptr_Private_v3half = OpTypePointer Private %v3half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v3half %val 1
+         %23 = OpCompositeExtract %v3half %val 2
+         %24 = OpCompositeExtract %v3half %val 3
+         %25 = OpCompositeConstruct %mat3v3half %22 %23 %24
+         %26 = OpCompositeExtract %int %val 4
+         %27 = OpCompositeConstruct %S %21 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %36
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
+               OpBranch %37
+         %37 = OpLabel
+               OpLoopMerge %38 %39 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %uint %i
+         %43 = OpULessThan %bool %42 %uint_4
+         %41 = OpLogicalNot %bool %43
+               OpSelectionMerge %45 None
+               OpBranchConditional %41 %46 %45
+         %46 = OpLabel
+               OpBranch %38
+         %45 = OpLabel
+               OpStore %var_for_index %val_0
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_S %arr %50
+         %54 = OpLoad %uint %i
+         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
+         %57 = OpLoad %S_std140 %56
+         %53 = OpFunctionCall %S %conv_S %57
+               OpStore %52 %53
+               OpBranch %39
+         %39 = OpLabel
+         %58 = OpLoad %uint %i
+         %60 = OpIAdd %uint %58 %uint_1
+               OpStore %i %60
+               OpBranch %37
+         %38 = OpLabel
+         %61 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %61
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v3half None %62
+         %64 = OpLabel
+         %69 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %72 = OpAccessChain %_ptr_Uniform_v3half %69 %uint_1
+         %73 = OpLoad %v3half %72
+         %75 = OpAccessChain %_ptr_Uniform_v3half %69 %uint_2
+         %76 = OpLoad %v3half %75
+         %79 = OpAccessChain %_ptr_Uniform_v3half %69 %uint_3
+         %80 = OpLoad %v3half %79
+         %81 = OpCompositeConstruct %mat3v3half %73 %76 %80
+               OpReturnValue %81
+               OpFunctionEnd
+          %f = OpFunction %void None %82
+         %85 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %89 = OpLoad %_arr_S_std140_uint_4 %88
+         %86 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %89
+               OpStore %p %86
+         %92 = OpAccessChain %_ptr_Private_S %p %int_1
+         %94 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %95 = OpLoad %S_std140 %94
+         %93 = OpFunctionCall %S %conv_S %95
+               OpStore %92 %93
+         %98 = OpAccessChain %_ptr_Private_mat3v3half %p %int_3 %uint_1
+         %99 = OpFunctionCall %mat3v3half %load_u_inner_2_m
+               OpStore %98 %99
+        %102 = OpAccessChain %_ptr_Private_v3half %p %int_1 %uint_1 %100
+        %103 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %36 %uint_2
+        %104 = OpLoad %v3half %103
+        %105 = OpVectorShuffle %v3half %104 %104 2 0 1
+               OpStore %102 %105
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..d6f4bf6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl
new file mode 100644
index 0000000..290b0f1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..403b78d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,80 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 3, 3> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  s.Store<vector<float16_t, 3> >(136u, vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c89fc84
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,85 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 3, 3> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  s.Store<vector<float16_t, 3> >(136u, vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001D5F9C4A510(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..dec449e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,103 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat3(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3 load_u_inner_2_m() {
+  return f16mat3(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..c1f26b0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half3x3 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = half3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..cdbee5f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,186 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 109
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+          %S = OpTypeStruct %int %mat3v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %34 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat3v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %83 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat3v3half = OpTypePointer StorageBuffer %mat3v3half
+        %103 = OpConstantNull %int
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v3half %val 1
+         %23 = OpCompositeExtract %v3half %val 2
+         %24 = OpCompositeExtract %v3half %val 3
+         %25 = OpCompositeConstruct %mat3v3half %22 %23 %24
+         %26 = OpCompositeExtract %int %val 4
+         %27 = OpCompositeConstruct %S %21 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v3half None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v3half %70 %uint_1
+         %74 = OpLoad %v3half %73
+         %76 = OpAccessChain %_ptr_Uniform_v3half %70 %uint_2
+         %77 = OpLoad %v3half %76
+         %80 = OpAccessChain %_ptr_Uniform_v3half %70 %uint_3
+         %81 = OpLoad %v3half %80
+         %82 = OpCompositeConstruct %mat3v3half %74 %77 %81
+               OpReturnValue %82
+               OpFunctionEnd
+          %f = OpFunction %void None %83
+         %86 = OpLabel
+         %88 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %91 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %92 = OpLoad %_arr_S_std140_uint_4 %91
+         %89 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %92
+               OpStore %88 %89
+         %95 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %97 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %98 = OpLoad %S_std140 %97
+         %96 = OpFunctionCall %S %conv_S %98
+               OpStore %95 %96
+        %101 = OpAccessChain %_ptr_StorageBuffer_mat3v3half %s %uint_0 %int_3 %uint_1
+        %102 = OpFunctionCall %mat3v3half %load_u_inner_2_m
+               OpStore %101 %102
+        %105 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1 %uint_1 %103
+        %106 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %37 %uint_2
+        %107 = OpLoad %v3half %106
+        %108 = OpVectorShuffle %v3half %107 %107 2 0 1
+               OpStore %105 %108
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..6a5bf6f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..c8835b5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..10d0d09
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,75 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 3> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  w[1].m[0] = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b205f67
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,80 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 3> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  w[1].m[0] = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021282D17B60(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..9a55a19
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,108 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat3(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3 load_u_inner_2_m() {
+  return f16mat3(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, f16mat3(f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..0e181df
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half3x3 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = half3((*(tint_symbol_2))[0].m[1]).zxy;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..002fd8b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,220 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 131
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %v3half %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+          %S = OpTypeStruct %int %mat3v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %35 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %38 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %51 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %64 = OpTypeFunction %mat3v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %84 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+        %102 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat3v3half = OpTypePointer Workgroup %mat3v3half
+        %120 = OpConstantNull %int
+%_ptr_Workgroup_v3half = OpTypePointer Workgroup %v3half
+        %126 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v3half %val 1
+         %24 = OpCompositeExtract %v3half %val 2
+         %25 = OpCompositeExtract %v3half %val 3
+         %26 = OpCompositeConstruct %mat3v3half %23 %24 %25
+         %27 = OpCompositeExtract %int %val 4
+         %28 = OpCompositeConstruct %S %22 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
+          %i = OpVariable %_ptr_Function_uint Function %38
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
+               OpBranch %39
+         %39 = OpLabel
+               OpLoopMerge %40 %41 None
+               OpBranch %42
+         %42 = OpLabel
+         %44 = OpLoad %uint %i
+         %45 = OpULessThan %bool %44 %uint_4
+         %43 = OpLogicalNot %bool %45
+               OpSelectionMerge %47 None
+               OpBranchConditional %43 %48 %47
+         %48 = OpLabel
+               OpBranch %40
+         %47 = OpLabel
+               OpStore %var_for_index %val_0
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_S %arr %52
+         %56 = OpLoad %uint %i
+         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
+         %59 = OpLoad %S_std140 %58
+         %55 = OpFunctionCall %S %conv_S %59
+               OpStore %54 %55
+               OpBranch %41
+         %41 = OpLabel
+         %60 = OpLoad %uint %i
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %i %62
+               OpBranch %39
+         %40 = OpLabel
+         %63 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %63
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v3half None %64
+         %66 = OpLabel
+         %71 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %74 = OpAccessChain %_ptr_Uniform_v3half %71 %uint_1
+         %75 = OpLoad %v3half %74
+         %77 = OpAccessChain %_ptr_Uniform_v3half %71 %uint_2
+         %78 = OpLoad %v3half %77
+         %81 = OpAccessChain %_ptr_Uniform_v3half %71 %uint_3
+         %82 = OpLoad %v3half %81
+         %83 = OpCompositeConstruct %mat3v3half %75 %78 %82
+               OpReturnValue %83
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %84
+%local_invocation_index = OpFunctionParameter %uint
+         %88 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %38
+               OpStore %idx %local_invocation_index
+               OpBranch %90
+         %90 = OpLabel
+               OpLoopMerge %91 %92 None
+               OpBranch %93
+         %93 = OpLabel
+         %95 = OpLoad %uint %idx
+         %96 = OpULessThan %bool %95 %uint_4
+         %94 = OpLogicalNot %bool %96
+               OpSelectionMerge %97 None
+               OpBranchConditional %94 %98 %97
+         %98 = OpLabel
+               OpBranch %91
+         %97 = OpLabel
+         %99 = OpLoad %uint %idx
+        %101 = OpAccessChain %_ptr_Workgroup_S %w %99
+               OpStore %101 %102
+               OpBranch %92
+         %92 = OpLabel
+        %103 = OpLoad %uint %idx
+        %104 = OpIAdd %uint %103 %uint_1
+               OpStore %idx %104
+               OpBranch %90
+         %91 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %109 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %110 = OpLoad %_arr_S_std140_uint_4 %109
+        %107 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %110
+               OpStore %w %107
+        %112 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %114 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %115 = OpLoad %S_std140 %114
+        %113 = OpFunctionCall %S %conv_S %115
+               OpStore %112 %113
+        %118 = OpAccessChain %_ptr_Workgroup_mat3v3half %w %int_3 %uint_1
+        %119 = OpFunctionCall %mat3v3half %load_u_inner_2_m
+               OpStore %118 %119
+        %122 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1 %uint_1 %120
+        %123 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %38 %uint_2
+        %124 = OpLoad %v3half %123
+        %125 = OpVectorShuffle %v3half %124 %124 2 0 1
+               OpStore %122 %125
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %126
+        %128 = OpLabel
+        %130 = OpLoad %uint %local_invocation_index_1
+        %129 = OpFunctionCall %void %f_inner %130
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..32a68e8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..ed316bc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,32 @@
+struct Inner {
+  @size(64)
+  m : mat3x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat3x3<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec3<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..debcd3f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,76 @@
+struct Inner {
+  float3x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x3 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float3x3 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float3 l_a_i_a_i_m_i = asfloat(a[scalar_offset_3 / 4].xyz);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_4 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_4 / 4][scalar_offset_4 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..debcd3f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,76 @@
+struct Inner {
+  float3x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x3 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float3x3 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float3 l_a_i_a_i_m_i = asfloat(a[scalar_offset_3 / 4].xyz);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_4 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_4 / 4][scalar_offset_4 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..1d273da
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,46 @@
+#version 310 es
+
+struct Inner {
+  mat3 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_a_i_save = tint_symbol_1;
+  int tint_symbol_2 = i();
+  int p_a_i_a_i_m_i_save = tint_symbol_2;
+  Outer l_a[4] = a.inner;
+  Outer l_a_i = a.inner[p_a_i_save];
+  Inner l_a_i_a[4] = a.inner[p_a_i_save].a;
+  Inner l_a_i_a_i = a.inner[p_a_i_save].a[p_a_i_a_i_save];
+  mat3 l_a_i_a_i_m = a.inner[p_a_i_save].a[p_a_i_a_i_save].m;
+  vec3 l_a_i_a_i_m_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int tint_symbol_3 = i();
+  float l_a_i_a_i_m_i_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..ec3d690
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float3x3 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  float3x3 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  float3 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..3c32545
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,88 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 54
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+      %Inner = OpTypeStruct %mat3v3float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %14 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %14
+         %17 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %24 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %i = OpFunction %int None %17
+         %19 = OpLabel
+         %20 = OpLoad %int %counter
+         %22 = OpIAdd %int %20 %int_1
+               OpStore %counter %22
+         %23 = OpLoad %int %counter
+               OpReturnValue %23
+               OpFunctionEnd
+          %f = OpFunction %void None %24
+         %27 = OpLabel
+         %28 = OpFunctionCall %int %i
+         %29 = OpFunctionCall %int %i
+         %30 = OpFunctionCall %int %i
+         %33 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %34 = OpLoad %_arr_Outer_uint_4 %33
+         %36 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %28
+         %37 = OpLoad %Outer %36
+         %39 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %28 %uint_0
+         %40 = OpLoad %_arr_Inner_uint_4 %39
+         %42 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %28 %uint_0 %29
+         %43 = OpLoad %Inner %42
+         %45 = OpAccessChain %_ptr_Uniform_mat3v3float %a %uint_0 %28 %uint_0 %29 %uint_0
+         %46 = OpLoad %mat3v3float %45
+         %48 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %28 %uint_0 %29 %uint_0 %30
+         %49 = OpLoad %v3float %48
+         %50 = OpFunctionCall %int %i
+         %52 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %28 %uint_0 %29 %uint_0 %30 %50
+         %53 = OpLoad %float %52
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..028d0c2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+struct Inner {
+  @size(64)
+  m : mat3x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat3x3<f32> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec3<f32> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..bd25e46
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,29 @@
+struct Inner {
+  @size(64)
+  m : mat3x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat3x3<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec3<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..13f6480
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,61 @@
+struct Inner {
+  float3x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float3x3 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float3x3 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float3 l_a_3_a_2_m_1 = asfloat(a[57].xyz);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..13f6480
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,61 @@
+struct Inner {
+  float3x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float3x3 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float3x3 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float3 l_a_3_a_2_m_1 = asfloat(a[57].xyz);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..ff47341
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,33 @@
+#version 310 es
+
+struct Inner {
+  mat3 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+void f() {
+  Outer l_a[4] = a.inner;
+  Outer l_a_3 = a.inner[3];
+  Inner l_a_3_a[4] = a.inner[3].a;
+  Inner l_a_3_a_2 = a.inner[3].a[2];
+  mat3 l_a_3_a_2_m = a.inner[3].a[2].m;
+  vec3 l_a_3_a_2_m_1 = a.inner[3].a[2].m[1];
+  float l_a_3_a_2_m_1_0 = a.inner[3].a[2].m[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..4092ef5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float3x3 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  float3x3 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  float3 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..0eeec1b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,73 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+      %Inner = OpTypeStruct %mat3v3float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+        %int = OpTypeInt 32 1
+      %int_3 = OpConstant %int 3
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+         %40 = OpConstantNull %int
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %20 = OpLoad %_arr_Outer_uint_4 %19
+         %24 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %int_3
+         %25 = OpLoad %Outer %24
+         %27 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %int_3 %uint_0
+         %28 = OpLoad %_arr_Inner_uint_4 %27
+         %31 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %int_3 %uint_0 %int_2
+         %32 = OpLoad %Inner %31
+         %34 = OpAccessChain %_ptr_Uniform_mat3v3float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0
+         %35 = OpLoad %mat3v3float %34
+         %38 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1
+         %39 = OpLoad %v3float %38
+         %42 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1 %40
+         %43 = OpLoad %float %42
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..ae7fbe5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,27 @@
+struct Inner {
+  @size(64)
+  m : mat3x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat3x3<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec3<f32> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl
new file mode 100644
index 0000000..12d9206
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat3x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].zxy);
+    let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..55e12dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float3x3 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x3 t = transpose(tint_symbol(u, 272u));
+  const float l = length(asfloat(u[2].xyz).zxy);
+  const float a = abs(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..55e12dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float3x3 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x3 t = transpose(tint_symbol(u, 272u));
+  const float l = length(asfloat(u[2].xyz).zxy);
+  const float a = abs(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..573743d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,41 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat3 m;
+  int after;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void f() {
+  mat3 t = transpose(u.inner[2].m);
+  float l = length(u.inner[0].m[1].zxy);
+  float a = abs(u.inner[0].m[1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..08a4ad4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float3x3 m;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float3x3 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float3((*(tint_symbol))[0].m[1]).zxy);
+  float const a = fabs(float3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..d52bc20
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+         %24 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+          %S = OpTypeStruct %int %mat3v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+      %int_2 = OpConstant %int 2
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+         %25 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0 %int_2 %uint_1
+         %22 = OpLoad %mat3v3float %21
+         %16 = OpTranspose %mat3v3float %22
+         %28 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %25 %uint_1 %int_1
+         %29 = OpLoad %v3float %28
+         %30 = OpVectorShuffle %v3float %29 %29 2 0 1
+         %23 = OpExtInst %float %24 Length %30
+         %32 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %25 %uint_1 %int_1
+         %33 = OpLoad %v3float %32
+         %34 = OpVectorShuffle %v3float %33 %33 2 0 1
+         %35 = OpCompositeExtract %float %34 0
+         %31 = OpExtInst %float %24 FAbs %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..7ef5711
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat3x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].zxy);
+  let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl
new file mode 100644
index 0000000..902ca19
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl

@@ -0,0 +1,23 @@
+struct S {
+  before : i32,
+  m : mat3x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat3x3<f32>) {}
+fn d(v : vec3<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].zxy);
+    e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..112cec2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,59 @@
+struct S {
+  int before;
+  float3x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float3x3 m) {
+}
+
+void d(float3 v) {
+}
+
+void e(float f_1) {
+}
+
+float3x3 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 272u));
+  d(asfloat(u[2].xyz).zxy);
+  e(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..112cec2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,59 @@
+struct S {
+  int before;
+  float3x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float3x3 m) {
+}
+
+void d(float3 v) {
+}
+
+void e(float f_1) {
+}
+
+float3x3 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 272u));
+  d(asfloat(u[2].xyz).zxy);
+  e(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..8697e87
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,58 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat3 m;
+  int after;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat3 m) {
+}
+
+void d(vec3 v) {
+}
+
+void e(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[2]);
+  c(u.inner[2].m);
+  d(u.inner[0].m[1].zxy);
+  e(u.inner[0].m[1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..bd1b6bd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float3x3 m;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float3x3 m) {
+}
+
+void d(float3 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float3((*(tint_symbol))[0].m[1]).zxy);
+  e(float3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..2438c29
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,112 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 63
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+          %S = OpTypeStruct %int %mat3v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %_arr_S_uint_4
+         %17 = OpTypeFunction %void %S
+         %21 = OpTypeFunction %void %mat3v3float
+         %25 = OpTypeFunction %void %v3float
+         %29 = OpTypeFunction %void %float
+         %33 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+         %52 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %a = OpFunction %void None %12
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %s = OpFunctionParameter %S
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %m = OpFunctionParameter %mat3v3float
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+          %v = OpFunctionParameter %v3float
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %29
+        %f_1 = OpFunctionParameter %float
+         %32 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %40 = OpLoad %_arr_S_uint_4 %39
+         %36 = OpFunctionCall %void %a %40
+         %44 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %45 = OpLoad %S %44
+         %41 = OpFunctionCall %void %b %45
+         %49 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0 %int_2 %uint_1
+         %50 = OpLoad %mat3v3float %49
+         %46 = OpFunctionCall %void %c %50
+         %55 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %52 %uint_1 %int_1
+         %56 = OpLoad %v3float %55
+         %57 = OpVectorShuffle %v3float %56 %56 2 0 1
+         %51 = OpFunctionCall %void %d %57
+         %59 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %52 %uint_1 %int_1
+         %60 = OpLoad %v3float %59
+         %61 = OpVectorShuffle %v3float %60 %60 2 0 1
+         %62 = OpCompositeExtract %float %61 0
+         %58 = OpFunctionCall %void %e %62
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..e323489
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,32 @@
+struct S {
+  before : i32,
+  m : mat3x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat3x3<f32>) {
+}
+
+fn d(v : vec3<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].zxy);
+  e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl
new file mode 100644
index 0000000..32735dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat3x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..1645513
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,44 @@
+struct S {
+  int before;
+  float3x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float3x3 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 272u);
+  p[1].m[0] = asfloat(u[2].xyz).zxy;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..1645513
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,44 @@
+struct S {
+  int before;
+  float3x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float3x3 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 272u);
+  p[1].m[0] = asfloat(u[2].xyz).zxy;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..d8b2808
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,43 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat3 m;
+  int after;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, 0u, 0u, mat3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[3].m = u.inner[2].m;
+  p[1].m[0] = u.inner[0].m[1].zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..7e169af
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float3x3 m;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..7950fc0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,78 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+          %S = OpTypeStruct %int %mat3v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %14 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %14
+       %void = OpTypeVoid
+         %15 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_Private_mat3v3float = OpTypePointer Private %mat3v3float
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+         %37 = OpConstantNull %int
+%_ptr_Private_v3float = OpTypePointer Private %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %15
+         %18 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %22 = OpLoad %_arr_S_uint_4 %21
+               OpStore %p %22
+         %25 = OpAccessChain %_ptr_Private_S %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %29 = OpLoad %S %28
+               OpStore %25 %29
+         %33 = OpAccessChain %_ptr_Private_mat3v3float %p %int_3 %uint_1
+         %35 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0 %int_2 %uint_1
+         %36 = OpLoad %mat3v3float %35
+               OpStore %33 %36
+         %39 = OpAccessChain %_ptr_Private_v3float %p %int_1 %uint_1 %37
+         %41 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %37 %uint_1 %int_1
+         %42 = OpLoad %v3float %41
+         %43 = OpVectorShuffle %v3float %42 %42 2 0 1
+               OpStore %39 %43
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..0783f5a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat3x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl
new file mode 100644
index 0000000..35b7c90
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat3x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..51fb0e8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float3x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float3x3 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 400u, tint_symbol_8(u, 272u));
+  s.Store3(144u, asuint(asfloat(u[2].xyz).zxy));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..51fb0e8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float3x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float3x3 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 400u, tint_symbol_8(u, 272u));
+  s.Store3(144u, asuint(asfloat(u[2].xyz).zxy));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..5f1ebf0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,46 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat3 m;
+  int after;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[3].m = u.inner[2].m;
+  s.inner[1].m[0] = u.inner[0].m[1].zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..6ffd304
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float3x3 m;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..a1b2958
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+          %S = OpTypeStruct %int %mat3v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+         %38 = OpConstantNull %int
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %14
+         %17 = OpLabel
+         %20 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %22 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %23 = OpLoad %_arr_S_uint_4 %22
+               OpStore %20 %23
+         %26 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %30 = OpLoad %S %29
+               OpStore %26 %30
+         %34 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %s %uint_0 %int_3 %uint_1
+         %36 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0 %int_2 %uint_1
+         %37 = OpLoad %mat3v3float %36
+               OpStore %34 %37
+         %40 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1 %uint_1 %38
+         %42 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %38 %uint_1 %int_1
+         %43 = OpLoad %v3float %42
+         %44 = OpVectorShuffle %v3float %43 %43 2 0 1
+               OpStore %40 %44
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..843821b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat3x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..49368e1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat3x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e50deb1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float3x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x3 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 272u);
+  w[1].m[0] = asfloat(u[2].xyz).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e50deb1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float3x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x3 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 272u);
+  w[1].m[0] = asfloat(u[2].xyz).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..c1a89c6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,51 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat3 m;
+  int after;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+shared S w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, 0u, 0u, mat3(vec3(0.0f), vec3(0.0f), vec3(0.0f)), 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[3].m = u.inner[2].m;
+  w[1].m[0] = u.inner[0].m[1].zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..1fddc70
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,47 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float3x3 m;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float3((*(tint_symbol_2))[0].m[1]).zxy;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..7922ffb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,124 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 72
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+          %S = OpTypeStruct %int %mat3v3float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+       %void = OpTypeVoid
+         %16 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %23 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %37 = OpConstantNull %S
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat3v3float = OpTypePointer Workgroup %mat3v3float
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+         %60 = OpConstantNull %int
+%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+         %67 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %16
+%local_invocation_index = OpFunctionParameter %uint
+         %20 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %23
+               OpStore %idx %local_invocation_index
+               OpBranch %24
+         %24 = OpLabel
+               OpLoopMerge %25 %26 None
+               OpBranch %27
+         %27 = OpLabel
+         %29 = OpLoad %uint %idx
+         %30 = OpULessThan %bool %29 %uint_4
+         %28 = OpLogicalNot %bool %30
+               OpSelectionMerge %32 None
+               OpBranchConditional %28 %33 %32
+         %33 = OpLabel
+               OpBranch %25
+         %32 = OpLabel
+         %34 = OpLoad %uint %idx
+         %36 = OpAccessChain %_ptr_Workgroup_S %w %34
+               OpStore %36 %37
+               OpBranch %26
+         %26 = OpLabel
+         %38 = OpLoad %uint %idx
+         %40 = OpIAdd %uint %38 %uint_1
+               OpStore %idx %40
+               OpBranch %24
+         %25 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %46 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %47 = OpLoad %_arr_S_uint_4 %46
+               OpStore %w %47
+         %49 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+         %52 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %53 = OpLoad %S %52
+               OpStore %49 %53
+         %56 = OpAccessChain %_ptr_Workgroup_mat3v3float %w %int_3 %uint_1
+         %58 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0 %int_2 %uint_1
+         %59 = OpLoad %mat3v3float %58
+               OpStore %56 %59
+         %62 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1 %uint_1 %60
+         %64 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %60 %uint_1 %int_1
+         %65 = OpLoad %v3float %64
+         %66 = OpVectorShuffle %v3float %65 %65 2 0 1
+               OpStore %62 %66
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %67
+         %69 = OpLabel
+         %71 = OpLoad %uint %local_invocation_index_1
+         %70 = OpFunctionCall %void %f_inner %71
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..1488b07
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat3x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..353b7ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat3x4<f16>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec4<f16>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f16             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7487aeb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,93 @@
+struct Inner {
+  matrix<float16_t, 3, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 4> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 3, 4> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_7 = a[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const vector<float16_t, 4> l_a_i_a_i_m_i = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..cf6d6ac
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,98 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 3, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 4> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 3, 4> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_7 = a[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const vector<float16_t, 4> l_a_i_a_i_m_i = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001C15A92C5C0(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..5b0a6bc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,152 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat3x4 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+};
+
+struct Inner_std140 {
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat3x4(val.m_0, val.m_1, val.m_2), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x4 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return f16mat3x4(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1, a.inner[s_save].a[s_save_1].m_2);
+}
+
+f16vec4 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2;
+      break;
+    }
+    default: {
+      return f16vec4(0.0hf);
+      break;
+    }
+  }
+}
+
+float16_t load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2[p3];
+      break;
+    }
+    default: {
+      return 0.0hf;
+      break;
+    }
+  }
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat3x4 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  f16vec4 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat3x4 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  f16vec4 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float16_t l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..eed6f51
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half3x4 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  half3x4 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  half4 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  half const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..4f50ed0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,324 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 204
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%Inner_std140 = OpTypeStruct %v4half %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat3v4half = OpTypeMatrix %v4half 3
+      %Inner = OpTypeStruct %mat3v4half
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %34 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %41 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %44 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %57 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %70 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %78 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %85 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %98 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %110 = OpTypeFunction %mat3v4half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_2 = OpConstant %uint 2
+        %131 = OpTypeFunction %v4half %uint %uint %uint
+        %148 = OpConstantNull %v4half
+        %149 = OpTypeFunction %half %uint %uint %uint %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %168 = OpConstantNull %half
+       %void = OpTypeVoid
+        %169 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v4half %val 0
+         %30 = OpCompositeExtract %v4half %val 1
+         %31 = OpCompositeExtract %v4half %val 2
+         %32 = OpCompositeConstruct %mat3v4half %29 %30 %31
+         %33 = OpCompositeConstruct %Inner %32
+               OpReturnValue %33
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %34
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %38 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %41
+        %i_0 = OpVariable %_ptr_Function_uint Function %44
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %57
+               OpBranch %45
+         %45 = OpLabel
+               OpLoopMerge %46 %47 None
+               OpBranch %48
+         %48 = OpLabel
+         %50 = OpLoad %uint %i_0
+         %51 = OpULessThan %bool %50 %uint_4
+         %49 = OpLogicalNot %bool %51
+               OpSelectionMerge %53 None
+               OpBranchConditional %49 %54 %53
+         %54 = OpLabel
+               OpBranch %46
+         %53 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %58 = OpLoad %uint %i_0
+         %60 = OpAccessChain %_ptr_Function_Inner %arr %58
+         %62 = OpLoad %uint %i_0
+         %64 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %62
+         %65 = OpLoad %Inner_std140 %64
+         %61 = OpFunctionCall %Inner %conv_Inner %65
+               OpStore %60 %61
+               OpBranch %47
+         %47 = OpLabel
+         %66 = OpLoad %uint %i_0
+         %68 = OpIAdd %uint %66 %uint_1
+               OpStore %i_0 %68
+               OpBranch %45
+         %46 = OpLabel
+         %69 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %69
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %70
+      %val_1 = OpFunctionParameter %Outer_std140
+         %74 = OpLabel
+         %76 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %75 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %76
+         %77 = OpCompositeConstruct %Outer %75
+               OpReturnValue %77
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %78
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %82 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %85
+        %i_1 = OpVariable %_ptr_Function_uint Function %44
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %98
+               OpBranch %87
+         %87 = OpLabel
+               OpLoopMerge %88 %89 None
+               OpBranch %90
+         %90 = OpLabel
+         %92 = OpLoad %uint %i_1
+         %93 = OpULessThan %bool %92 %uint_4
+         %91 = OpLogicalNot %bool %93
+               OpSelectionMerge %94 None
+               OpBranchConditional %91 %95 %94
+         %95 = OpLabel
+               OpBranch %88
+         %94 = OpLabel
+               OpStore %var_for_index %val_2
+         %99 = OpLoad %uint %i_1
+        %101 = OpAccessChain %_ptr_Function_Outer %arr_0 %99
+        %103 = OpLoad %uint %i_1
+        %105 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %103
+        %106 = OpLoad %Outer_std140 %105
+        %102 = OpFunctionCall %Outer %conv_Outer %106
+               OpStore %101 %102
+               OpBranch %89
+         %89 = OpLabel
+        %107 = OpLoad %uint %i_1
+        %108 = OpIAdd %uint %107 %uint_1
+               OpStore %i_1 %108
+               OpBranch %87
+         %88 = OpLabel
+        %109 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %109
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m = OpFunction %mat3v4half None %110
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %114 = OpLabel
+        %118 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
+        %121 = OpAccessChain %_ptr_Uniform_v4half %118 %uint_0
+        %122 = OpLoad %v4half %121
+        %124 = OpAccessChain %_ptr_Uniform_v4half %118 %uint_1
+        %125 = OpLoad %v4half %124
+        %128 = OpAccessChain %_ptr_Uniform_v4half %118 %uint_2
+        %129 = OpLoad %v4half %128
+        %130 = OpCompositeConstruct %mat3v4half %122 %125 %129
+               OpReturnValue %130
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2 = OpFunction %v4half None %131
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %136 = OpLabel
+               OpSelectionMerge %137 None
+               OpSwitch %p2 %138 0 %139 1 %140 2 %141
+        %139 = OpLabel
+        %142 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %143 = OpLoad %v4half %142
+               OpReturnValue %143
+        %140 = OpLabel
+        %144 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %145 = OpLoad %v4half %144
+               OpReturnValue %145
+        %141 = OpLabel
+        %146 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_2
+        %147 = OpLoad %v4half %146
+               OpReturnValue %147
+        %138 = OpLabel
+               OpReturnValue %148
+        %137 = OpLabel
+               OpReturnValue %148
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %half None %149
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %155 = OpLabel
+               OpSelectionMerge %156 None
+               OpSwitch %p2_0 %157 0 %158 1 %159 2 %160
+        %158 = OpLabel
+        %162 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %163 = OpLoad %half %162
+               OpReturnValue %163
+        %159 = OpLabel
+        %164 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %165 = OpLoad %half %164
+               OpReturnValue %165
+        %160 = OpLabel
+        %166 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_2 %p3
+        %167 = OpLoad %half %166
+               OpReturnValue %167
+        %157 = OpLabel
+               OpReturnValue %168
+        %156 = OpLabel
+               OpReturnValue %168
+               OpFunctionEnd
+          %f = OpFunction %void None %169
+        %172 = OpLabel
+        %173 = OpFunctionCall %int %i
+        %174 = OpFunctionCall %int %i
+        %175 = OpFunctionCall %int %i
+        %178 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %179 = OpLoad %_arr_Outer_std140_uint_4 %178
+        %176 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %179
+        %182 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %173
+        %183 = OpLoad %Outer_std140 %182
+        %180 = OpFunctionCall %Outer %conv_Outer %183
+        %186 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %173 %uint_0
+        %187 = OpLoad %_arr_Inner_std140_uint_4 %186
+        %184 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %187
+        %189 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %173 %uint_0 %174
+        %190 = OpLoad %Inner_std140 %189
+        %188 = OpFunctionCall %Inner %conv_Inner %190
+        %192 = OpBitcast %uint %173
+        %193 = OpBitcast %uint %174
+        %191 = OpFunctionCall %mat3v4half %load_a_inner_p0_a_p1_m %192 %193
+        %195 = OpBitcast %uint %173
+        %196 = OpBitcast %uint %174
+        %197 = OpBitcast %uint %175
+        %194 = OpFunctionCall %v4half %load_a_inner_p0_a_p1_m_p2 %195 %196 %197
+        %198 = OpFunctionCall %int %i
+        %200 = OpBitcast %uint %173
+        %201 = OpBitcast %uint %174
+        %202 = OpBitcast %uint %175
+        %203 = OpBitcast %uint %198
+        %199 = OpFunctionCall %half %load_a_inner_p0_a_p1_m_p2_p3 %200 %201 %202 %203
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..f439e90
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,36 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat3x4<f16> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec4<f16> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f16 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..bb5b51e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,31 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat3x4<f16>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec4<f16>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f16             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..356bff3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,76 @@
+struct Inner {
+  matrix<float16_t, 3, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 3, 4> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 3, 4> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_6 = a[56].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const vector<float16_t, 4> l_a_3_a_2_m_1 = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..681fb5e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,81 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 3, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 3, 4> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 3, 4> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_6 = a[56].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const vector<float16_t, 4> l_a_3_a_2_m_1 = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002CC6184FF70(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..185039b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,98 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat3x4 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+};
+
+struct Inner_std140 {
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat3x4(val.m_0, val.m_1, val.m_2), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x4 load_a_inner_3_a_2_m() {
+  return f16mat3x4(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1, a.inner[3u].a[2u].m_2);
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat3x4 p_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec4 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat3x4 l_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec4 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float16_t l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..7d2bfd1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half3x4 m;
+  /* 0x0018 */ tint_array<int8_t, 40> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  half3x4 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  half4 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  half const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..dce6859
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,232 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 144
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%Inner_std140 = OpTypeStruct %v4half %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat3v4half = OpTypeMatrix %v4half 3
+      %Inner = OpTypeStruct %mat3v4half
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %23 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %30 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %33 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %46 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %59 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %67 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %74 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %87 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+         %99 = OpTypeFunction %mat3v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %void = OpTypeVoid
+        %119 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v4half %val 0
+         %19 = OpCompositeExtract %v4half %val 1
+         %20 = OpCompositeExtract %v4half %val 2
+         %21 = OpCompositeConstruct %mat3v4half %18 %19 %20
+         %22 = OpCompositeConstruct %Inner %21
+               OpReturnValue %22
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %23
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %30
+          %i = OpVariable %_ptr_Function_uint Function %33
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %46
+               OpBranch %34
+         %34 = OpLabel
+               OpLoopMerge %35 %36 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %uint %i
+         %40 = OpULessThan %bool %39 %uint_4
+         %38 = OpLogicalNot %bool %40
+               OpSelectionMerge %42 None
+               OpBranchConditional %38 %43 %42
+         %43 = OpLabel
+               OpBranch %35
+         %42 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %47 = OpLoad %uint %i
+         %49 = OpAccessChain %_ptr_Function_Inner %arr %47
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %51
+         %54 = OpLoad %Inner_std140 %53
+         %50 = OpFunctionCall %Inner %conv_Inner %54
+               OpStore %49 %50
+               OpBranch %36
+         %36 = OpLabel
+         %55 = OpLoad %uint %i
+         %57 = OpIAdd %uint %55 %uint_1
+               OpStore %i %57
+               OpBranch %34
+         %35 = OpLabel
+         %58 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %58
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %59
+      %val_1 = OpFunctionParameter %Outer_std140
+         %63 = OpLabel
+         %65 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %64 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %65
+         %66 = OpCompositeConstruct %Outer %64
+               OpReturnValue %66
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %67
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %71 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %74
+        %i_0 = OpVariable %_ptr_Function_uint Function %33
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %87
+               OpBranch %76
+         %76 = OpLabel
+               OpLoopMerge %77 %78 None
+               OpBranch %79
+         %79 = OpLabel
+         %81 = OpLoad %uint %i_0
+         %82 = OpULessThan %bool %81 %uint_4
+         %80 = OpLogicalNot %bool %82
+               OpSelectionMerge %83 None
+               OpBranchConditional %80 %84 %83
+         %84 = OpLabel
+               OpBranch %77
+         %83 = OpLabel
+               OpStore %var_for_index %val_2
+         %88 = OpLoad %uint %i_0
+         %90 = OpAccessChain %_ptr_Function_Outer %arr_0 %88
+         %92 = OpLoad %uint %i_0
+         %94 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %92
+         %95 = OpLoad %Outer_std140 %94
+         %91 = OpFunctionCall %Outer %conv_Outer %95
+               OpStore %90 %91
+               OpBranch %78
+         %78 = OpLabel
+         %96 = OpLoad %uint %i_0
+         %97 = OpIAdd %uint %96 %uint_1
+               OpStore %i_0 %97
+               OpBranch %76
+         %77 = OpLabel
+         %98 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %98
+               OpFunctionEnd
+%load_a_inner_3_a_2_m = OpFunction %mat3v4half None %99
+        %101 = OpLabel
+        %107 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %110 = OpAccessChain %_ptr_Uniform_v4half %107 %uint_0
+        %111 = OpLoad %v4half %110
+        %113 = OpAccessChain %_ptr_Uniform_v4half %107 %uint_1
+        %114 = OpLoad %v4half %113
+        %116 = OpAccessChain %_ptr_Uniform_v4half %107 %uint_2
+        %117 = OpLoad %v4half %116
+        %118 = OpCompositeConstruct %mat3v4half %111 %114 %117
+               OpReturnValue %118
+               OpFunctionEnd
+          %f = OpFunction %void None %119
+        %122 = OpLabel
+        %125 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %126 = OpLoad %_arr_Outer_std140_uint_4 %125
+        %123 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %126
+        %129 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %130 = OpLoad %Outer_std140 %129
+        %127 = OpFunctionCall %Outer %conv_Outer %130
+        %133 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %134 = OpLoad %_arr_Inner_std140_uint_4 %133
+        %131 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %134
+        %136 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %137 = OpLoad %Inner_std140 %136
+        %135 = OpFunctionCall %Inner %conv_Inner %137
+        %138 = OpFunctionCall %mat3v4half %load_a_inner_3_a_2_m
+        %139 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %140 = OpLoad %v4half %139
+        %142 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %33
+        %143 = OpLoad %half %142
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..f22e235
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,29 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat3x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat3x4<f16> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec4<f16> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f16 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl
new file mode 100644
index 0000000..8888b64
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].ywxz);
+    let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b0b62a9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,36 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 3> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz);
+  uint2 ubo_load_7 = u[1].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2568b7a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,41 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 3> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz);
+  uint2 ubo_load_7 = u[1].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002683960F9A0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..afb0d15
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,84 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat3x4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+f16mat3x4 load_u_inner_2_m() {
+  return f16mat3x4(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  f16mat4x3 t = transpose(load_u_inner_2_m());
+  float16_t l = length(u.inner[0u].m_1.ywxz);
+  float16_t a = abs(u.inner[0u].m_1.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..0c0b9e8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half3x4 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  half4x3 const t = transpose((*(tint_symbol))[2].m);
+  half const l = length(half4((*(tint_symbol))[0].m[1]).ywxz);
+  half const a = fabs(half4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..1d01a7c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,86 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 52
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %42 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v4half = OpTypeMatrix %v4half 3
+         %11 = OpTypeFunction %mat3v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+     %v3half = OpTypeVector %half 3
+ %mat4v3half = OpTypeMatrix %v3half 4
+         %43 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat3v4half None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v4half %19 %uint_1
+         %24 = OpLoad %v4half %23
+         %26 = OpAccessChain %_ptr_Uniform_v4half %19 %uint_2
+         %27 = OpLoad %v4half %26
+         %30 = OpAccessChain %_ptr_Uniform_v4half %19 %uint_3
+         %31 = OpLoad %v4half %30
+         %32 = OpCompositeConstruct %mat3v4half %24 %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %40 = OpFunctionCall %mat3v4half %load_u_inner_2_m
+         %37 = OpTranspose %mat4v3half %40
+         %44 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %43 %uint_2
+         %45 = OpLoad %v4half %44
+         %46 = OpVectorShuffle %v4half %45 %45 1 3 0 2
+         %41 = OpExtInst %half %42 Length %46
+         %48 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %43 %uint_2
+         %49 = OpLoad %v4half %48
+         %50 = OpVectorShuffle %v4half %49 %49 1 3 0 2
+         %51 = OpCompositeExtract %half %50 0
+         %47 = OpExtInst %half %42 FAbs %51
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..c598c75
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].ywxz);
+  let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl
new file mode 100644
index 0000000..3161686
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl

@@ -0,0 +1,25 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat3x4<f16>) {}
+fn d(v : vec4<f16>) {}
+fn e(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].ywxz);
+    e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a190fc0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,77 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 3, 4> m) {
+}
+
+void d(vector<float16_t, 4> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  d(vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz);
+  uint2 ubo_load_7 = u[1].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  e(vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..21d911e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,82 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 3, 4> m) {
+}
+
+void d(vector<float16_t, 4> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  d(vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz);
+  uint2 ubo_load_7 = u[1].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  e(vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000278FC792580(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..d66d7ae
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,115 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat3x4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(f16mat3x4 m) {
+}
+
+void d(f16vec4 v) {
+}
+
+void e(float16_t f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat3x4(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x4 load_u_inner_2_m() {
+  return f16mat3x4(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.ywxz);
+  e(u.inner[0u].m_1.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..abf6c24
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half3x4 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(half3x4 m) {
+}
+
+void d(half4 v) {
+}
+
+void e(half f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(half4((*(tint_symbol))[0].m[1]).ywxz);
+  e(half4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..296c26c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,210 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 124
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat3v4half = OpTypeMatrix %v4half 3
+          %S = OpTypeStruct %int %mat3v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat3v4half
+         %27 = OpTypeFunction %void %v4half
+         %31 = OpTypeFunction %void %half
+         %35 = OpTypeFunction %S %S_std140
+         %46 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %52 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %55 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %68 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %81 = OpTypeFunction %mat3v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_3 = OpConstant %uint 3
+        %101 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat3v4half
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v4half
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %half
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v4half %val 1
+         %41 = OpCompositeExtract %v4half %val 2
+         %42 = OpCompositeExtract %v4half %val 3
+         %43 = OpCompositeConstruct %mat3v4half %40 %41 %42
+         %44 = OpCompositeExtract %int %val 4
+         %45 = OpCompositeConstruct %S %39 %43 %44
+               OpReturnValue %45
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %46
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %49 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %52
+          %i = OpVariable %_ptr_Function_uint Function %55
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %68
+               OpBranch %56
+         %56 = OpLabel
+               OpLoopMerge %57 %58 None
+               OpBranch %59
+         %59 = OpLabel
+         %61 = OpLoad %uint %i
+         %62 = OpULessThan %bool %61 %uint_4
+         %60 = OpLogicalNot %bool %62
+               OpSelectionMerge %64 None
+               OpBranchConditional %60 %65 %64
+         %65 = OpLabel
+               OpBranch %57
+         %64 = OpLabel
+               OpStore %var_for_index %val_0
+         %69 = OpLoad %uint %i
+         %71 = OpAccessChain %_ptr_Function_S %arr %69
+         %73 = OpLoad %uint %i
+         %75 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %73
+         %76 = OpLoad %S_std140 %75
+         %72 = OpFunctionCall %S %conv_S %76
+               OpStore %71 %72
+               OpBranch %58
+         %58 = OpLabel
+         %77 = OpLoad %uint %i
+         %79 = OpIAdd %uint %77 %uint_1
+               OpStore %i %79
+               OpBranch %56
+         %57 = OpLabel
+         %80 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %80
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v4half None %81
+         %83 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %91 = OpAccessChain %_ptr_Uniform_v4half %88 %uint_1
+         %92 = OpLoad %v4half %91
+         %94 = OpAccessChain %_ptr_Uniform_v4half %88 %uint_2
+         %95 = OpLoad %v4half %94
+         %98 = OpAccessChain %_ptr_Uniform_v4half %88 %uint_3
+         %99 = OpLoad %v4half %98
+        %100 = OpCompositeConstruct %mat3v4half %92 %95 %99
+               OpReturnValue %100
+               OpFunctionEnd
+          %f = OpFunction %void None %101
+        %103 = OpLabel
+        %107 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %108 = OpLoad %_arr_S_std140_uint_4 %107
+        %105 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %108
+        %104 = OpFunctionCall %void %a %105
+        %111 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %112 = OpLoad %S_std140 %111
+        %110 = OpFunctionCall %S %conv_S %112
+        %109 = OpFunctionCall %void %b %110
+        %114 = OpFunctionCall %mat3v4half %load_u_inner_2_m
+        %113 = OpFunctionCall %void %c %114
+        %116 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %55 %uint_2
+        %117 = OpLoad %v4half %116
+        %118 = OpVectorShuffle %v4half %117 %117 1 3 0 2
+        %115 = OpFunctionCall %void %d %118
+        %120 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %55 %uint_2
+        %121 = OpLoad %v4half %120
+        %122 = OpVectorShuffle %v4half %121 %121 1 3 0 2
+        %123 = OpCompositeExtract %half %122 0
+        %119 = OpFunctionCall %void %e %123
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..431ed93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat3x4<f16>) {
+}
+
+fn d(v : vec4<f16>) {
+}
+
+fn e(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].ywxz);
+  e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl
new file mode 100644
index 0000000..d857256
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..60c2c7a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,59 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 3, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  p[1].m[0] = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5da97a8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,64 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 3, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  p[1].m[0] = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz;
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002605021E560(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..3d4a68d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,100 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat3x4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat3x4(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x4 load_u_inner_2_m() {
+  return f16mat3x4(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..23983c5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half3x4 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = half4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..dbb4838
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,177 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 106
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v4half = OpTypeMatrix %v4half 3
+          %S = OpTypeStruct %int %mat3v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %36 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %49 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %62 = OpTypeFunction %mat3v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %82 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat3v4half = OpTypePointer Private %mat3v4half
+        %100 = OpConstantNull %int
+%_ptr_Private_v4half = OpTypePointer Private %v4half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v4half %val 1
+         %23 = OpCompositeExtract %v4half %val 2
+         %24 = OpCompositeExtract %v4half %val 3
+         %25 = OpCompositeConstruct %mat3v4half %22 %23 %24
+         %26 = OpCompositeExtract %int %val 4
+         %27 = OpCompositeConstruct %S %21 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %36
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %49
+               OpBranch %37
+         %37 = OpLabel
+               OpLoopMerge %38 %39 None
+               OpBranch %40
+         %40 = OpLabel
+         %42 = OpLoad %uint %i
+         %43 = OpULessThan %bool %42 %uint_4
+         %41 = OpLogicalNot %bool %43
+               OpSelectionMerge %45 None
+               OpBranchConditional %41 %46 %45
+         %46 = OpLabel
+               OpBranch %38
+         %45 = OpLabel
+               OpStore %var_for_index %val_0
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_S %arr %50
+         %54 = OpLoad %uint %i
+         %56 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %54
+         %57 = OpLoad %S_std140 %56
+         %53 = OpFunctionCall %S %conv_S %57
+               OpStore %52 %53
+               OpBranch %39
+         %39 = OpLabel
+         %58 = OpLoad %uint %i
+         %60 = OpIAdd %uint %58 %uint_1
+               OpStore %i %60
+               OpBranch %37
+         %38 = OpLabel
+         %61 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %61
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v4half None %62
+         %64 = OpLabel
+         %69 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %72 = OpAccessChain %_ptr_Uniform_v4half %69 %uint_1
+         %73 = OpLoad %v4half %72
+         %75 = OpAccessChain %_ptr_Uniform_v4half %69 %uint_2
+         %76 = OpLoad %v4half %75
+         %79 = OpAccessChain %_ptr_Uniform_v4half %69 %uint_3
+         %80 = OpLoad %v4half %79
+         %81 = OpCompositeConstruct %mat3v4half %73 %76 %80
+               OpReturnValue %81
+               OpFunctionEnd
+          %f = OpFunction %void None %82
+         %85 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %89 = OpLoad %_arr_S_std140_uint_4 %88
+         %86 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %89
+               OpStore %p %86
+         %92 = OpAccessChain %_ptr_Private_S %p %int_1
+         %94 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %95 = OpLoad %S_std140 %94
+         %93 = OpFunctionCall %S %conv_S %95
+               OpStore %92 %93
+         %98 = OpAccessChain %_ptr_Private_mat3v4half %p %int_3 %uint_1
+         %99 = OpFunctionCall %mat3v4half %load_u_inner_2_m
+               OpStore %98 %99
+        %102 = OpAccessChain %_ptr_Private_v4half %p %int_1 %uint_1 %100
+        %103 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %36 %uint_2
+        %104 = OpLoad %v4half %103
+        %105 = OpVectorShuffle %v4half %104 %104 1 3 0 2
+               OpStore %102 %105
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..e8ef5f7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl
new file mode 100644
index 0000000..ab3c08c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c236690
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,80 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 3, 4> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  s.Store<vector<float16_t, 4> >(136u, vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..49f88f1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,85 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 3, 4> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  s.Store<vector<float16_t, 4> >(136u, vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FE54B10170(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..bd567f0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,103 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat3x4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat3x4(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x4 load_u_inner_2_m() {
+  return f16mat3x4(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..519bfae
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half3x4 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = half4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..7b96c19
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,186 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 109
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v4half = OpTypeMatrix %v4half 3
+          %S = OpTypeStruct %int %mat3v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %28 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %34 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat3v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %83 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat3v4half = OpTypePointer StorageBuffer %mat3v4half
+        %103 = OpConstantNull %int
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v4half %val 1
+         %23 = OpCompositeExtract %v4half %val 2
+         %24 = OpCompositeExtract %v4half %val 3
+         %25 = OpCompositeConstruct %mat3v4half %22 %23 %24
+         %26 = OpCompositeExtract %int %val 4
+         %27 = OpCompositeConstruct %S %21 %25 %26
+               OpReturnValue %27
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %28
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %31 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %34
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v4half None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v4half %70 %uint_1
+         %74 = OpLoad %v4half %73
+         %76 = OpAccessChain %_ptr_Uniform_v4half %70 %uint_2
+         %77 = OpLoad %v4half %76
+         %80 = OpAccessChain %_ptr_Uniform_v4half %70 %uint_3
+         %81 = OpLoad %v4half %80
+         %82 = OpCompositeConstruct %mat3v4half %74 %77 %81
+               OpReturnValue %82
+               OpFunctionEnd
+          %f = OpFunction %void None %83
+         %86 = OpLabel
+         %88 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %91 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %92 = OpLoad %_arr_S_std140_uint_4 %91
+         %89 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %92
+               OpStore %88 %89
+         %95 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %97 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %98 = OpLoad %S_std140 %97
+         %96 = OpFunctionCall %S %conv_S %98
+               OpStore %95 %96
+        %101 = OpAccessChain %_ptr_StorageBuffer_mat3v4half %s %uint_0 %int_3 %uint_1
+        %102 = OpFunctionCall %mat3v4half %load_u_inner_2_m
+               OpStore %101 %102
+        %105 = OpAccessChain %_ptr_StorageBuffer_v4half %s %uint_0 %int_1 %uint_1 %103
+        %106 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %37 %uint_2
+        %107 = OpLoad %v4half %106
+        %108 = OpVectorShuffle %v4half %107 %107 1 3 0 2
+               OpStore %105 %108
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..9e7bab0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..94af8f4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e0f83f8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,75 @@
+struct S {
+  int before;
+  matrix<float16_t, 3, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 4> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  w[1].m[0] = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..fc24a1c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,80 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 3, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 4> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_6 = u[1].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  w[1].m[0] = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FEBFD6AE90(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..2e03b1a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,108 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat3x4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  int after;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat3x4(val.m_0, val.m_1, val.m_2), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.after, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat3x4 load_u_inner_2_m() {
+  return f16mat3x4(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, f16mat3x4(f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..444e7ca
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half3x4 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = half4((*(tint_symbol_2))[0].m[1]).ywxz;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..63eb276
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,220 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 131
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %v4half %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v4half = OpTypeMatrix %v4half 3
+          %S = OpTypeStruct %int %mat3v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %35 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %38 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %51 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %64 = OpTypeFunction %mat3v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %84 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+        %102 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat3v4half = OpTypePointer Workgroup %mat3v4half
+        %120 = OpConstantNull %int
+%_ptr_Workgroup_v4half = OpTypePointer Workgroup %v4half
+        %126 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v4half %val 1
+         %24 = OpCompositeExtract %v4half %val 2
+         %25 = OpCompositeExtract %v4half %val 3
+         %26 = OpCompositeConstruct %mat3v4half %23 %24 %25
+         %27 = OpCompositeExtract %int %val 4
+         %28 = OpCompositeConstruct %S %22 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
+          %i = OpVariable %_ptr_Function_uint Function %38
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
+               OpBranch %39
+         %39 = OpLabel
+               OpLoopMerge %40 %41 None
+               OpBranch %42
+         %42 = OpLabel
+         %44 = OpLoad %uint %i
+         %45 = OpULessThan %bool %44 %uint_4
+         %43 = OpLogicalNot %bool %45
+               OpSelectionMerge %47 None
+               OpBranchConditional %43 %48 %47
+         %48 = OpLabel
+               OpBranch %40
+         %47 = OpLabel
+               OpStore %var_for_index %val_0
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_S %arr %52
+         %56 = OpLoad %uint %i
+         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
+         %59 = OpLoad %S_std140 %58
+         %55 = OpFunctionCall %S %conv_S %59
+               OpStore %54 %55
+               OpBranch %41
+         %41 = OpLabel
+         %60 = OpLoad %uint %i
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %i %62
+               OpBranch %39
+         %40 = OpLabel
+         %63 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %63
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat3v4half None %64
+         %66 = OpLabel
+         %71 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %74 = OpAccessChain %_ptr_Uniform_v4half %71 %uint_1
+         %75 = OpLoad %v4half %74
+         %77 = OpAccessChain %_ptr_Uniform_v4half %71 %uint_2
+         %78 = OpLoad %v4half %77
+         %81 = OpAccessChain %_ptr_Uniform_v4half %71 %uint_3
+         %82 = OpLoad %v4half %81
+         %83 = OpCompositeConstruct %mat3v4half %75 %78 %82
+               OpReturnValue %83
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %84
+%local_invocation_index = OpFunctionParameter %uint
+         %88 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %38
+               OpStore %idx %local_invocation_index
+               OpBranch %90
+         %90 = OpLabel
+               OpLoopMerge %91 %92 None
+               OpBranch %93
+         %93 = OpLabel
+         %95 = OpLoad %uint %idx
+         %96 = OpULessThan %bool %95 %uint_4
+         %94 = OpLogicalNot %bool %96
+               OpSelectionMerge %97 None
+               OpBranchConditional %94 %98 %97
+         %98 = OpLabel
+               OpBranch %91
+         %97 = OpLabel
+         %99 = OpLoad %uint %idx
+        %101 = OpAccessChain %_ptr_Workgroup_S %w %99
+               OpStore %101 %102
+               OpBranch %92
+         %92 = OpLabel
+        %103 = OpLoad %uint %idx
+        %104 = OpIAdd %uint %103 %uint_1
+               OpStore %idx %104
+               OpBranch %90
+         %91 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %109 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %110 = OpLoad %_arr_S_std140_uint_4 %109
+        %107 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %110
+               OpStore %w %107
+        %112 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %114 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %115 = OpLoad %S_std140 %114
+        %113 = OpFunctionCall %S %conv_S %115
+               OpStore %112 %113
+        %118 = OpAccessChain %_ptr_Workgroup_mat3v4half %w %int_3 %uint_1
+        %119 = OpFunctionCall %mat3v4half %load_u_inner_2_m
+               OpStore %118 %119
+        %122 = OpAccessChain %_ptr_Workgroup_v4half %w %int_1 %uint_1 %120
+        %123 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %38 %uint_2
+        %124 = OpLoad %v4half %123
+        %125 = OpVectorShuffle %v4half %124 %124 1 3 0 2
+               OpStore %122 %125
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %126
+        %128 = OpLabel
+        %130 = OpLoad %uint %local_invocation_index_1
+        %129 = OpFunctionCall %void %f_inner %130
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..cecda57
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat3x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..252c1e6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,32 @@
+struct Inner {
+  @size(64)
+  m : mat3x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat3x4<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec4<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2e5ba64
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,76 @@
+struct Inner {
+  float3x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x4 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float3x4 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float4 l_a_i_a_i_m_i = asfloat(a[scalar_offset_3 / 4]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_4 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_4 / 4][scalar_offset_4 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2e5ba64
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,76 @@
+struct Inner {
+  float3x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x4 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float3x4 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_3 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float4 l_a_i_a_i_m_i = asfloat(a[scalar_offset_3 / 4]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_4 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_4 / 4][scalar_offset_4 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..1b6b32e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,46 @@
+#version 310 es
+
+struct Inner {
+  mat3x4 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_a_i_save = tint_symbol_1;
+  int tint_symbol_2 = i();
+  int p_a_i_a_i_m_i_save = tint_symbol_2;
+  Outer l_a[4] = a.inner;
+  Outer l_a_i = a.inner[p_a_i_save];
+  Inner l_a_i_a[4] = a.inner[p_a_i_save].a;
+  Inner l_a_i_a_i = a.inner[p_a_i_save].a[p_a_i_a_i_save];
+  mat3x4 l_a_i_a_i_m = a.inner[p_a_i_save].a[p_a_i_a_i_save].m;
+  vec4 l_a_i_a_i_m_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int tint_symbol_3 = i();
+  float l_a_i_a_i_m_i_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..c277984
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float3x4 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  float3x4 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  float4 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..6342891
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,88 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 54
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+      %Inner = OpTypeStruct %mat3v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %14 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %14
+         %17 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %24 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %i = OpFunction %int None %17
+         %19 = OpLabel
+         %20 = OpLoad %int %counter
+         %22 = OpIAdd %int %20 %int_1
+               OpStore %counter %22
+         %23 = OpLoad %int %counter
+               OpReturnValue %23
+               OpFunctionEnd
+          %f = OpFunction %void None %24
+         %27 = OpLabel
+         %28 = OpFunctionCall %int %i
+         %29 = OpFunctionCall %int %i
+         %30 = OpFunctionCall %int %i
+         %33 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %34 = OpLoad %_arr_Outer_uint_4 %33
+         %36 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %28
+         %37 = OpLoad %Outer %36
+         %39 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %28 %uint_0
+         %40 = OpLoad %_arr_Inner_uint_4 %39
+         %42 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %28 %uint_0 %29
+         %43 = OpLoad %Inner %42
+         %45 = OpAccessChain %_ptr_Uniform_mat3v4float %a %uint_0 %28 %uint_0 %29 %uint_0
+         %46 = OpLoad %mat3v4float %45
+         %48 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %28 %uint_0 %29 %uint_0 %30
+         %49 = OpLoad %v4float %48
+         %50 = OpFunctionCall %int %i
+         %52 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %28 %uint_0 %29 %uint_0 %30 %50
+         %53 = OpLoad %float %52
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..73f5a5d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+struct Inner {
+  @size(64)
+  m : mat3x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat3x4<f32> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec4<f32> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..c378c04
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,29 @@
+struct Inner {
+  @size(64)
+  m : mat3x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat3x4<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec4<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..26a9369
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,61 @@
+struct Inner {
+  float3x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float3x4 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float3x4 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float4 l_a_3_a_2_m_1 = asfloat(a[57]);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..26a9369
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,61 @@
+struct Inner {
+  float3x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float3x4 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float3x4 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float4 l_a_3_a_2_m_1 = asfloat(a[57]);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..e23c570
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,33 @@
+#version 310 es
+
+struct Inner {
+  mat3x4 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+void f() {
+  Outer l_a[4] = a.inner;
+  Outer l_a_3 = a.inner[3];
+  Inner l_a_3_a[4] = a.inner[3].a;
+  Inner l_a_3_a_2 = a.inner[3].a[2];
+  mat3x4 l_a_3_a_2_m = a.inner[3].a[2].m;
+  vec4 l_a_3_a_2_m_1 = a.inner[3].a[2].m[1];
+  float l_a_3_a_2_m_1_0 = a.inner[3].a[2].m[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..f8ef4dc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float3x4 m;
+  /* 0x0030 */ tint_array<int8_t, 16> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  float3x4 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  float4 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..39b3fdc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,73 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+      %Inner = OpTypeStruct %mat3v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+        %int = OpTypeInt 32 1
+      %int_3 = OpConstant %int 3
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+         %40 = OpConstantNull %int
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %20 = OpLoad %_arr_Outer_uint_4 %19
+         %24 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %int_3
+         %25 = OpLoad %Outer %24
+         %27 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %int_3 %uint_0
+         %28 = OpLoad %_arr_Inner_uint_4 %27
+         %31 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %int_3 %uint_0 %int_2
+         %32 = OpLoad %Inner %31
+         %34 = OpAccessChain %_ptr_Uniform_mat3v4float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0
+         %35 = OpLoad %mat3v4float %34
+         %38 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1
+         %39 = OpLoad %v4float %38
+         %42 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1 %40
+         %43 = OpLoad %float %42
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..cb991e4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,27 @@
+struct Inner {
+  @size(64)
+  m : mat3x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat3x4<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec4<f32> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl
new file mode 100644
index 0000000..0a98f13
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat3x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].ywxz);
+    let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0826949
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float3x4 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 t = transpose(tint_symbol(u, 272u));
+  const float l = length(asfloat(u[2]).ywxz);
+  const float a = abs(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0826949
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float3x4 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 t = transpose(tint_symbol(u, 272u));
+  const float l = length(asfloat(u[2]).ywxz);
+  const float a = abs(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..0e0b28f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,41 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat3x4 m;
+  int after;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void f() {
+  mat4x3 t = transpose(u.inner[2].m);
+  float l = length(u.inner[0].m[1].ywxz);
+  float a = abs(u.inner[0].m[1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..c93494d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float3x4 m;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float4x3 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float4((*(tint_symbol))[0].m[1]).ywxz);
+  float const a = fabs(float4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..022664f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,67 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 38
+; Schema: 0
+               OpCapability Shader
+         %26 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+          %S = OpTypeStruct %int %mat3v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+     %uint_0 = OpConstant %uint 0
+      %int_2 = OpConstant %int 2
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+         %27 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %23 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0 %int_2 %uint_1
+         %24 = OpLoad %mat3v4float %23
+         %16 = OpTranspose %mat4v3float %24
+         %30 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %27 %uint_1 %int_1
+         %31 = OpLoad %v4float %30
+         %32 = OpVectorShuffle %v4float %31 %31 1 3 0 2
+         %25 = OpExtInst %float %26 Length %32
+         %34 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %27 %uint_1 %int_1
+         %35 = OpLoad %v4float %34
+         %36 = OpVectorShuffle %v4float %35 %35 1 3 0 2
+         %37 = OpCompositeExtract %float %36 0
+         %33 = OpExtInst %float %26 FAbs %37
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..8e60f77
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat3x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].ywxz);
+  let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl
new file mode 100644
index 0000000..17458df
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl

@@ -0,0 +1,23 @@
+struct S {
+  before : i32,
+  m : mat3x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat3x4<f32>) {}
+fn d(v : vec4<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].ywxz);
+    e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0bc1c2f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,59 @@
+struct S {
+  int before;
+  float3x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float3x4 m) {
+}
+
+void d(float4 v) {
+}
+
+void e(float f_1) {
+}
+
+float3x4 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 272u));
+  d(asfloat(u[2]).ywxz);
+  e(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0bc1c2f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,59 @@
+struct S {
+  int before;
+  float3x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float3x4 m) {
+}
+
+void d(float4 v) {
+}
+
+void e(float f_1) {
+}
+
+float3x4 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 272u));
+  d(asfloat(u[2]).ywxz);
+  e(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..894d25f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,58 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat3x4 m;
+  int after;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat3x4 m) {
+}
+
+void d(vec4 v) {
+}
+
+void e(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[2]);
+  c(u.inner[2].m);
+  d(u.inner[0].m[1].ywxz);
+  e(u.inner[0].m[1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..b2034ef
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float3x4 m;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float3x4 m) {
+}
+
+void d(float4 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float4((*(tint_symbol))[0].m[1]).ywxz);
+  e(float4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..2d03cb6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,112 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 63
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+          %S = OpTypeStruct %int %mat3v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %_arr_S_uint_4
+         %17 = OpTypeFunction %void %S
+         %21 = OpTypeFunction %void %mat3v4float
+         %25 = OpTypeFunction %void %v4float
+         %29 = OpTypeFunction %void %float
+         %33 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+         %52 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %a = OpFunction %void None %12
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %s = OpFunctionParameter %S
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %m = OpFunctionParameter %mat3v4float
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+          %v = OpFunctionParameter %v4float
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %29
+        %f_1 = OpFunctionParameter %float
+         %32 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %40 = OpLoad %_arr_S_uint_4 %39
+         %36 = OpFunctionCall %void %a %40
+         %44 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %45 = OpLoad %S %44
+         %41 = OpFunctionCall %void %b %45
+         %49 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0 %int_2 %uint_1
+         %50 = OpLoad %mat3v4float %49
+         %46 = OpFunctionCall %void %c %50
+         %55 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %52 %uint_1 %int_1
+         %56 = OpLoad %v4float %55
+         %57 = OpVectorShuffle %v4float %56 %56 1 3 0 2
+         %51 = OpFunctionCall %void %d %57
+         %59 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %52 %uint_1 %int_1
+         %60 = OpLoad %v4float %59
+         %61 = OpVectorShuffle %v4float %60 %60 1 3 0 2
+         %62 = OpCompositeExtract %float %61 0
+         %58 = OpFunctionCall %void %e %62
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..9fa6069
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,32 @@
+struct S {
+  before : i32,
+  m : mat3x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat3x4<f32>) {
+}
+
+fn d(v : vec4<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].ywxz);
+  e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl
new file mode 100644
index 0000000..b3978f6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat3x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..d7e58d4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,44 @@
+struct S {
+  int before;
+  float3x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float3x4 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 272u);
+  p[1].m[0] = asfloat(u[2]).ywxz;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d7e58d4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,44 @@
+struct S {
+  int before;
+  float3x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float3x4 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 272u);
+  p[1].m[0] = asfloat(u[2]).ywxz;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..690aaca
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,43 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat3x4 m;
+  int after;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, 0u, 0u, mat3x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat3x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat3x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat3x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[3].m = u.inner[2].m;
+  p[1].m[0] = u.inner[0].m[1].ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..53a32ac
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float3x4 m;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..d7ca594
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,78 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+          %S = OpTypeStruct %int %mat3v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %14 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %14
+       %void = OpTypeVoid
+         %15 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_Private_mat3v4float = OpTypePointer Private %mat3v4float
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+         %37 = OpConstantNull %int
+%_ptr_Private_v4float = OpTypePointer Private %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %15
+         %18 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %22 = OpLoad %_arr_S_uint_4 %21
+               OpStore %p %22
+         %25 = OpAccessChain %_ptr_Private_S %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %29 = OpLoad %S %28
+               OpStore %25 %29
+         %33 = OpAccessChain %_ptr_Private_mat3v4float %p %int_3 %uint_1
+         %35 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0 %int_2 %uint_1
+         %36 = OpLoad %mat3v4float %35
+               OpStore %33 %36
+         %39 = OpAccessChain %_ptr_Private_v4float %p %int_1 %uint_1 %37
+         %41 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %37 %uint_1 %int_1
+         %42 = OpLoad %v4float %41
+         %43 = OpVectorShuffle %v4float %42 %42 1 3 0 2
+               OpStore %39 %43
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..25650c9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat3x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl
new file mode 100644
index 0000000..e45b0e0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat3x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..629f8ff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float3x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float3x4 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 400u, tint_symbol_8(u, 272u));
+  s.Store4(144u, asuint(asfloat(u[2]).ywxz));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..629f8ff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float3x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float3x4 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 400u, tint_symbol_8(u, 272u));
+  s.Store4(144u, asuint(asfloat(u[2]).ywxz));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..1460b32
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,46 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat3x4 m;
+  int after;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[3].m = u.inner[2].m;
+  s.inner[1].m[0] = u.inner[0].m[1].ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..8eb259e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float3x4 m;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..8073e21
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+          %S = OpTypeStruct %int %mat3v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+         %38 = OpConstantNull %int
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %14
+         %17 = OpLabel
+         %20 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %22 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %23 = OpLoad %_arr_S_uint_4 %22
+               OpStore %20 %23
+         %26 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %30 = OpLoad %S %29
+               OpStore %26 %30
+         %34 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %s %uint_0 %int_3 %uint_1
+         %36 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0 %int_2 %uint_1
+         %37 = OpLoad %mat3v4float %36
+               OpStore %34 %37
+         %40 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1 %uint_1 %38
+         %42 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %38 %uint_1 %int_1
+         %43 = OpLoad %v4float %42
+         %44 = OpVectorShuffle %v4float %43 %43 1 3 0 2
+               OpStore %40 %44
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..607b7e0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat3x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..32d7130
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat3x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2188035
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float3x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x4 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 272u);
+  w[1].m[0] = asfloat(u[2]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2188035
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float3x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x4 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_3 = ((offset + 0u)) / 4;
+  const uint scalar_offset_4 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_3 / 4][scalar_offset_3 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 272u);
+  w[1].m[0] = asfloat(u[2]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..c3d1ec4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,51 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat3x4 m;
+  int after;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+shared S w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, 0u, 0u, mat3x4(vec4(0.0f), vec4(0.0f), vec4(0.0f)), 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[3].m = u.inner[2].m;
+  w[1].m[0] = u.inner[0].m[1].ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..8fb8e97
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,47 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float3x4 m;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float4((*(tint_symbol_2))[0].m[1]).ywxz;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..91d76ce
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,124 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 72
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+          %S = OpTypeStruct %int %mat3v4float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+       %void = OpTypeVoid
+         %16 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %23 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %37 = OpConstantNull %S
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat3v4float = OpTypePointer Workgroup %mat3v4float
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+         %60 = OpConstantNull %int
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+         %67 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %16
+%local_invocation_index = OpFunctionParameter %uint
+         %20 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %23
+               OpStore %idx %local_invocation_index
+               OpBranch %24
+         %24 = OpLabel
+               OpLoopMerge %25 %26 None
+               OpBranch %27
+         %27 = OpLabel
+         %29 = OpLoad %uint %idx
+         %30 = OpULessThan %bool %29 %uint_4
+         %28 = OpLogicalNot %bool %30
+               OpSelectionMerge %32 None
+               OpBranchConditional %28 %33 %32
+         %33 = OpLabel
+               OpBranch %25
+         %32 = OpLabel
+         %34 = OpLoad %uint %idx
+         %36 = OpAccessChain %_ptr_Workgroup_S %w %34
+               OpStore %36 %37
+               OpBranch %26
+         %26 = OpLabel
+         %38 = OpLoad %uint %idx
+         %40 = OpIAdd %uint %38 %uint_1
+               OpStore %idx %40
+               OpBranch %24
+         %25 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %46 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %47 = OpLoad %_arr_S_uint_4 %46
+               OpStore %w %47
+         %49 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+         %52 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %53 = OpLoad %S %52
+               OpStore %49 %53
+         %56 = OpAccessChain %_ptr_Workgroup_mat3v4float %w %int_3 %uint_1
+         %58 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0 %int_2 %uint_1
+         %59 = OpLoad %mat3v4float %58
+               OpStore %56 %59
+         %62 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1 %uint_1 %60
+         %64 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %60 %uint_1 %int_1
+         %65 = OpLoad %v4float %64
+         %66 = OpVectorShuffle %v4float %65 %65 1 3 0 2
+               OpStore %62 %66
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %67
+         %69 = OpLabel
+         %71 = OpLoad %uint %local_invocation_index_1
+         %70 = OpFunctionCall %void %f_inner %71
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..e6817a3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat3x4_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat3x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl
deleted file mode 100644
index 512ab8c..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl
+++ /dev/null

@@ -1,31 +0,0 @@
-struct Inner {
-  m : mat4x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-var<private> counter = 0;
-fn i() -> i32 { counter++; return counter; }
-
-@compute @workgroup_size(1)
-fn f() {
-  let p_a           = &a;
-  let p_a_i         = &((*p_a)[i()]);
-  let p_a_i_a       = &((*p_a_i).a);
-  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
-  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
-  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
-
-
-  let l_a             : array<Outer, 4> =  *p_a;
-  let l_a_i           : Outer           =  *p_a_i;
-  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
-  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
-  let l_a_i_a_i_m     : mat4x2<f32>     =  *p_a_i_a_i_m;
-  let l_a_i_a_i_m_i   : vec2<f32>       =  *p_a_i_a_i_m_i;
-  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 8d635a9..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,82 +0,0 @@
-struct Inner {
-  float4x2 m;
-};
-struct Outer {
-  Inner a[4];
-};
-
-cbuffer cbuffer_a : register(b0, space0) {
-  uint4 a[32];
-};
-static int counter = 0;
-
-int i() {
-  counter = (counter + 1);
-  return counter;
-}
-
-float4x2 tint_symbol_8(uint4 buffer[32], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-Inner tint_symbol_7(uint4 buffer[32], uint offset) {
-  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
-  return tint_symbol_11;
-}
-
-typedef Inner tint_symbol_6_ret[4];
-tint_symbol_6_ret tint_symbol_6(uint4 buffer[32], uint offset) {
-  Inner arr[4] = (Inner[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 32u)));
-    }
-  }
-  return arr;
-}
-
-Outer tint_symbol_5(uint4 buffer[32], uint offset) {
-  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
-  return tint_symbol_12;
-}
-
-typedef Outer tint_symbol_4_ret[4];
-tint_symbol_4_ret tint_symbol_4(uint4 buffer[32], uint offset) {
-  Outer arr_1[4] = (Outer[4])0;
-  {
-    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
-      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 128u)));
-    }
-  }
-  return arr_1;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const int p_a_i_save = i();
-  const int p_a_i_a_i_save = i();
-  const int p_a_i_a_i_m_i_save = i();
-  const Outer l_a[4] = tint_symbol_4(a, 0u);
-  const Outer l_a_i = tint_symbol_5(a, (128u * uint(p_a_i_save)));
-  const Inner l_a_i_a[4] = tint_symbol_6(a, (128u * uint(p_a_i_save)));
-  const Inner l_a_i_a_i = tint_symbol_7(a, ((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))));
-  const float4x2 l_a_i_a_i_m = tint_symbol_8(a, ((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))));
-  const uint scalar_offset_4 = ((((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
-  uint4 ubo_load_4 = a[scalar_offset_4 / 4];
-  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_4 & 2) ? ubo_load_4.zw : ubo_load_4.xy));
-  const int tint_symbol = p_a_i_save;
-  const int tint_symbol_1 = p_a_i_a_i_save;
-  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
-  const int tint_symbol_3 = i();
-  const uint scalar_offset_5 = (((((128u * uint(tint_symbol)) + (32u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
-  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_5 / 4][scalar_offset_5 % 4]);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 8d635a9..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,82 +0,0 @@
-struct Inner {
-  float4x2 m;
-};
-struct Outer {
-  Inner a[4];
-};
-
-cbuffer cbuffer_a : register(b0, space0) {
-  uint4 a[32];
-};
-static int counter = 0;
-
-int i() {
-  counter = (counter + 1);
-  return counter;
-}
-
-float4x2 tint_symbol_8(uint4 buffer[32], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-Inner tint_symbol_7(uint4 buffer[32], uint offset) {
-  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
-  return tint_symbol_11;
-}
-
-typedef Inner tint_symbol_6_ret[4];
-tint_symbol_6_ret tint_symbol_6(uint4 buffer[32], uint offset) {
-  Inner arr[4] = (Inner[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 32u)));
-    }
-  }
-  return arr;
-}
-
-Outer tint_symbol_5(uint4 buffer[32], uint offset) {
-  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
-  return tint_symbol_12;
-}
-
-typedef Outer tint_symbol_4_ret[4];
-tint_symbol_4_ret tint_symbol_4(uint4 buffer[32], uint offset) {
-  Outer arr_1[4] = (Outer[4])0;
-  {
-    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
-      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 128u)));
-    }
-  }
-  return arr_1;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const int p_a_i_save = i();
-  const int p_a_i_a_i_save = i();
-  const int p_a_i_a_i_m_i_save = i();
-  const Outer l_a[4] = tint_symbol_4(a, 0u);
-  const Outer l_a_i = tint_symbol_5(a, (128u * uint(p_a_i_save)));
-  const Inner l_a_i_a[4] = tint_symbol_6(a, (128u * uint(p_a_i_save)));
-  const Inner l_a_i_a_i = tint_symbol_7(a, ((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))));
-  const float4x2 l_a_i_a_i_m = tint_symbol_8(a, ((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))));
-  const uint scalar_offset_4 = ((((128u * uint(p_a_i_save)) + (32u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
-  uint4 ubo_load_4 = a[scalar_offset_4 / 4];
-  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_4 & 2) ? ubo_load_4.zw : ubo_load_4.xy));
-  const int tint_symbol = p_a_i_save;
-  const int tint_symbol_1 = p_a_i_a_i_save;
-  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
-  const int tint_symbol_3 = i();
-  const uint scalar_offset_5 = (((((128u * uint(tint_symbol)) + (32u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
-  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_5 / 4][scalar_offset_5 % 4]);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.glsl
deleted file mode 100644
index 4bffcae..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.glsl
+++ /dev/null

@@ -1,140 +0,0 @@
-#version 310 es
-
-struct Inner {
-  mat4x2 m;
-};
-
-struct Inner_std140 {
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  vec2 m_3;
-};
-
-struct Outer {
-  Inner a[4];
-};
-
-struct Outer_std140 {
-  Inner_std140 a[4];
-};
-
-layout(binding = 0, std140) uniform a_block_std140_ubo {
-  Outer_std140 inner[4];
-} a;
-
-int counter = 0;
-int i() {
-  counter = (counter + 1);
-  return counter;
-}
-
-Inner conv_Inner(Inner_std140 val) {
-  return Inner(mat4x2(val.m_0, val.m_1, val.m_2, val.m_3));
-}
-
-Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
-  Inner arr[4] = Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_Inner(val[i]);
-    }
-  }
-  return arr;
-}
-
-Outer conv_Outer(Outer_std140 val) {
-  return Outer(conv_arr4_Inner(val.a));
-}
-
-Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
-  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_Outer(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat4x2 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
-  uint s_save = p0;
-  uint s_save_1 = p1;
-  return mat4x2(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1, a.inner[s_save].a[s_save_1].m_2, a.inner[s_save].a[s_save_1].m_3);
-}
-
-vec2 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
-  switch(p2) {
-    case 0u: {
-      return a.inner[p0].a[p1].m_0;
-      break;
-    }
-    case 1u: {
-      return a.inner[p0].a[p1].m_1;
-      break;
-    }
-    case 2u: {
-      return a.inner[p0].a[p1].m_2;
-      break;
-    }
-    case 3u: {
-      return a.inner[p0].a[p1].m_3;
-      break;
-    }
-    default: {
-      return vec2(0.0f);
-      break;
-    }
-  }
-}
-
-float load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
-  switch(p2) {
-    case 0u: {
-      return a.inner[p0].a[p1].m_0[p3];
-      break;
-    }
-    case 1u: {
-      return a.inner[p0].a[p1].m_1[p3];
-      break;
-    }
-    case 2u: {
-      return a.inner[p0].a[p1].m_2[p3];
-      break;
-    }
-    case 3u: {
-      return a.inner[p0].a[p1].m_3[p3];
-      break;
-    }
-    default: {
-      return 0.0f;
-      break;
-    }
-  }
-}
-
-void f() {
-  Outer p_a[4] = conv_arr4_Outer(a.inner);
-  int tint_symbol = i();
-  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
-  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
-  int tint_symbol_1 = i();
-  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
-  mat4x2 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
-  int tint_symbol_2 = i();
-  vec2 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
-  Outer l_a[4] = conv_arr4_Outer(a.inner);
-  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
-  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
-  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
-  mat4x2 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
-  vec2 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
-  int tint_symbol_3 = i();
-  float l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.msl
deleted file mode 100644
index 2f43afe..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.msl
+++ /dev/null

@@ -1,48 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct Inner {
-  /* 0x0000 */ float4x2 m;
-};
-
-struct Outer {
-  /* 0x0000 */ tint_array<Inner, 4> a;
-};
-
-int i() {
-  thread int tint_symbol_4 = 0;
-  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
-  return tint_symbol_4;
-}
-
-kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
-  int const tint_symbol = i();
-  int const p_a_i_save = tint_symbol;
-  int const tint_symbol_1 = i();
-  int const p_a_i_a_i_save = tint_symbol_1;
-  int const tint_symbol_2 = i();
-  int const p_a_i_a_i_m_i_save = tint_symbol_2;
-  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
-  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
-  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
-  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
-  float4x2 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
-  float2 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
-  int const tint_symbol_3 = i();
-  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.spvasm
deleted file mode 100644
index 9a4f081..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.spvasm
+++ /dev/null

@@ -1,334 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 215
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %a_block_std140 "a_block_std140"
-               OpMemberName %a_block_std140 0 "inner"
-               OpName %Outer_std140 "Outer_std140"
-               OpMemberName %Outer_std140 0 "a"
-               OpName %Inner_std140 "Inner_std140"
-               OpMemberName %Inner_std140 0 "m_0"
-               OpMemberName %Inner_std140 1 "m_1"
-               OpMemberName %Inner_std140 2 "m_2"
-               OpMemberName %Inner_std140 3 "m_3"
-               OpName %a "a"
-               OpName %counter "counter"
-               OpName %i "i"
-               OpName %Inner "Inner"
-               OpMemberName %Inner 0 "m"
-               OpName %conv_Inner "conv_Inner"
-               OpName %val "val"
-               OpName %conv_arr4_Inner "conv_arr4_Inner"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i_0 "i"
-               OpName %var_for_index_1 "var_for_index_1"
-               OpName %Outer "Outer"
-               OpMemberName %Outer 0 "a"
-               OpName %conv_Outer "conv_Outer"
-               OpName %val_1 "val"
-               OpName %conv_arr4_Outer "conv_arr4_Outer"
-               OpName %val_2 "val"
-               OpName %arr_0 "arr"
-               OpName %i_1 "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
-               OpName %p0 "p0"
-               OpName %p1 "p1"
-               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
-               OpName %p0_0 "p0"
-               OpName %p1_0 "p1"
-               OpName %p2 "p2"
-               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
-               OpName %p0_1 "p0"
-               OpName %p1_1 "p1"
-               OpName %p2_0 "p2"
-               OpName %p3 "p3"
-               OpName %f "f"
-               OpDecorate %a_block_std140 Block
-               OpMemberDecorate %a_block_std140 0 Offset 0
-               OpMemberDecorate %Outer_std140 0 Offset 0
-               OpMemberDecorate %Inner_std140 0 Offset 0
-               OpMemberDecorate %Inner_std140 1 Offset 8
-               OpMemberDecorate %Inner_std140 2 Offset 16
-               OpMemberDecorate %Inner_std140 3 Offset 24
-               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 32
-               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 128
-               OpDecorate %a NonWritable
-               OpDecorate %a DescriptorSet 0
-               OpDecorate %a Binding 0
-               OpMemberDecorate %Inner 0 Offset 0
-               OpMemberDecorate %Inner 0 ColMajor
-               OpMemberDecorate %Inner 0 MatrixStride 8
-               OpDecorate %_arr_Inner_uint_4 ArrayStride 32
-               OpMemberDecorate %Outer 0 Offset 0
-               OpDecorate %_arr_Outer_uint_4 ArrayStride 128
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-%Inner_std140 = OpTypeStruct %v2float %v2float %v2float %v2float
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
-%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
-%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
-%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
-%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
-          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
-        %int = OpTypeInt 32 1
-         %13 = OpConstantNull %int
-%_ptr_Private_int = OpTypePointer Private %int
-    %counter = OpVariable %_ptr_Private_int Private %13
-         %16 = OpTypeFunction %int
-      %int_1 = OpConstant %int 1
-%mat4v2float = OpTypeMatrix %v2float 4
-      %Inner = OpTypeStruct %mat4v2float
-         %23 = OpTypeFunction %Inner %Inner_std140
-%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
-         %35 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
-%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
-         %42 = OpConstantNull %_arr_Inner_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %45 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
-         %58 = OpConstantNull %_arr_Inner_std140_uint_4
-%_ptr_Function_Inner = OpTypePointer Function %Inner
-%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
-     %uint_1 = OpConstant %uint 1
-      %Outer = OpTypeStruct %_arr_Inner_uint_4
-         %71 = OpTypeFunction %Outer %Outer_std140
-%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
-         %79 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
-%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
-         %86 = OpConstantNull %_arr_Outer_uint_4
-%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
-         %99 = OpConstantNull %_arr_Outer_std140_uint_4
-%_ptr_Function_Outer = OpTypePointer Function %Outer
-%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
-        %111 = OpTypeFunction %mat4v2float %uint %uint
-     %uint_0 = OpConstant %uint 0
-%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-     %uint_2 = OpConstant %uint 2
-     %uint_3 = OpConstant %uint 3
-        %136 = OpTypeFunction %v2float %uint %uint %uint
-        %156 = OpConstantNull %v2float
-        %157 = OpTypeFunction %float %uint %uint %uint %uint
-%_ptr_Uniform_float = OpTypePointer Uniform %float
-        %179 = OpConstantNull %float
-       %void = OpTypeVoid
-        %180 = OpTypeFunction %void
-%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
-%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
-%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
-          %i = OpFunction %int None %16
-         %18 = OpLabel
-         %19 = OpLoad %int %counter
-         %21 = OpIAdd %int %19 %int_1
-               OpStore %counter %21
-         %22 = OpLoad %int %counter
-               OpReturnValue %22
-               OpFunctionEnd
- %conv_Inner = OpFunction %Inner None %23
-        %val = OpFunctionParameter %Inner_std140
-         %28 = OpLabel
-         %29 = OpCompositeExtract %v2float %val 0
-         %30 = OpCompositeExtract %v2float %val 1
-         %31 = OpCompositeExtract %v2float %val 2
-         %32 = OpCompositeExtract %v2float %val 3
-         %33 = OpCompositeConstruct %mat4v2float %29 %30 %31 %32
-         %34 = OpCompositeConstruct %Inner %33
-               OpReturnValue %34
-               OpFunctionEnd
-%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %35
-      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
-         %39 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %42
-        %i_0 = OpVariable %_ptr_Function_uint Function %45
-%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %58
-               OpBranch %46
-         %46 = OpLabel
-               OpLoopMerge %47 %48 None
-               OpBranch %49
-         %49 = OpLabel
-         %51 = OpLoad %uint %i_0
-         %52 = OpULessThan %bool %51 %uint_4
-         %50 = OpLogicalNot %bool %52
-               OpSelectionMerge %54 None
-               OpBranchConditional %50 %55 %54
-         %55 = OpLabel
-               OpBranch %47
-         %54 = OpLabel
-               OpStore %var_for_index_1 %val_0
-         %59 = OpLoad %uint %i_0
-         %61 = OpAccessChain %_ptr_Function_Inner %arr %59
-         %63 = OpLoad %uint %i_0
-         %65 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %63
-         %66 = OpLoad %Inner_std140 %65
-         %62 = OpFunctionCall %Inner %conv_Inner %66
-               OpStore %61 %62
-               OpBranch %48
-         %48 = OpLabel
-         %67 = OpLoad %uint %i_0
-         %69 = OpIAdd %uint %67 %uint_1
-               OpStore %i_0 %69
-               OpBranch %46
-         %47 = OpLabel
-         %70 = OpLoad %_arr_Inner_uint_4 %arr
-               OpReturnValue %70
-               OpFunctionEnd
- %conv_Outer = OpFunction %Outer None %71
-      %val_1 = OpFunctionParameter %Outer_std140
-         %75 = OpLabel
-         %77 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
-         %76 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %77
-         %78 = OpCompositeConstruct %Outer %76
-               OpReturnValue %78
-               OpFunctionEnd
-%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %79
-      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
-         %83 = OpLabel
-      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %86
-        %i_1 = OpVariable %_ptr_Function_uint Function %45
-%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %99
-               OpBranch %88
-         %88 = OpLabel
-               OpLoopMerge %89 %90 None
-               OpBranch %91
-         %91 = OpLabel
-         %93 = OpLoad %uint %i_1
-         %94 = OpULessThan %bool %93 %uint_4
-         %92 = OpLogicalNot %bool %94
-               OpSelectionMerge %95 None
-               OpBranchConditional %92 %96 %95
-         %96 = OpLabel
-               OpBranch %89
-         %95 = OpLabel
-               OpStore %var_for_index %val_2
-        %100 = OpLoad %uint %i_1
-        %102 = OpAccessChain %_ptr_Function_Outer %arr_0 %100
-        %104 = OpLoad %uint %i_1
-        %106 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %104
-        %107 = OpLoad %Outer_std140 %106
-        %103 = OpFunctionCall %Outer %conv_Outer %107
-               OpStore %102 %103
-               OpBranch %90
-         %90 = OpLabel
-        %108 = OpLoad %uint %i_1
-        %109 = OpIAdd %uint %108 %uint_1
-               OpStore %i_1 %109
-               OpBranch %88
-         %89 = OpLabel
-        %110 = OpLoad %_arr_Outer_uint_4 %arr_0
-               OpReturnValue %110
-               OpFunctionEnd
-%load_a_inner_p0_a_p1_m = OpFunction %mat4v2float None %111
-         %p0 = OpFunctionParameter %uint
-         %p1 = OpFunctionParameter %uint
-        %115 = OpLabel
-        %119 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
-        %122 = OpAccessChain %_ptr_Uniform_v2float %119 %uint_0
-        %123 = OpLoad %v2float %122
-        %125 = OpAccessChain %_ptr_Uniform_v2float %119 %uint_1
-        %126 = OpLoad %v2float %125
-        %129 = OpAccessChain %_ptr_Uniform_v2float %119 %uint_2
-        %130 = OpLoad %v2float %129
-        %133 = OpAccessChain %_ptr_Uniform_v2float %119 %uint_3
-        %134 = OpLoad %v2float %133
-        %135 = OpCompositeConstruct %mat4v2float %123 %126 %130 %134
-               OpReturnValue %135
-               OpFunctionEnd
-%load_a_inner_p0_a_p1_m_p2 = OpFunction %v2float None %136
-       %p0_0 = OpFunctionParameter %uint
-       %p1_0 = OpFunctionParameter %uint
-         %p2 = OpFunctionParameter %uint
-        %141 = OpLabel
-               OpSelectionMerge %142 None
-               OpSwitch %p2 %143 0 %144 1 %145 2 %146 3 %147
-        %144 = OpLabel
-        %148 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
-        %149 = OpLoad %v2float %148
-               OpReturnValue %149
-        %145 = OpLabel
-        %150 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
-        %151 = OpLoad %v2float %150
-               OpReturnValue %151
-        %146 = OpLabel
-        %152 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_2
-        %153 = OpLoad %v2float %152
-               OpReturnValue %153
-        %147 = OpLabel
-        %154 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_3
-        %155 = OpLoad %v2float %154
-               OpReturnValue %155
-        %143 = OpLabel
-               OpReturnValue %156
-        %142 = OpLabel
-               OpReturnValue %156
-               OpFunctionEnd
-%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %float None %157
-       %p0_1 = OpFunctionParameter %uint
-       %p1_1 = OpFunctionParameter %uint
-       %p2_0 = OpFunctionParameter %uint
-         %p3 = OpFunctionParameter %uint
-        %163 = OpLabel
-               OpSelectionMerge %164 None
-               OpSwitch %p2_0 %165 0 %166 1 %167 2 %168 3 %169
-        %166 = OpLabel
-        %171 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
-        %172 = OpLoad %float %171
-               OpReturnValue %172
-        %167 = OpLabel
-        %173 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
-        %174 = OpLoad %float %173
-               OpReturnValue %174
-        %168 = OpLabel
-        %175 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_2 %p3
-        %176 = OpLoad %float %175
-               OpReturnValue %176
-        %169 = OpLabel
-        %177 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_3 %p3
-        %178 = OpLoad %float %177
-               OpReturnValue %178
-        %165 = OpLabel
-               OpReturnValue %179
-        %164 = OpLabel
-               OpReturnValue %179
-               OpFunctionEnd
-          %f = OpFunction %void None %180
-        %183 = OpLabel
-        %184 = OpFunctionCall %int %i
-        %185 = OpFunctionCall %int %i
-        %186 = OpFunctionCall %int %i
-        %189 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
-        %190 = OpLoad %_arr_Outer_std140_uint_4 %189
-        %187 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %190
-        %193 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %184
-        %194 = OpLoad %Outer_std140 %193
-        %191 = OpFunctionCall %Outer %conv_Outer %194
-        %197 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %184 %uint_0
-        %198 = OpLoad %_arr_Inner_std140_uint_4 %197
-        %195 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %198
-        %200 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %184 %uint_0 %185
-        %201 = OpLoad %Inner_std140 %200
-        %199 = OpFunctionCall %Inner %conv_Inner %201
-        %203 = OpBitcast %uint %184
-        %204 = OpBitcast %uint %185
-        %202 = OpFunctionCall %mat4v2float %load_a_inner_p0_a_p1_m %203 %204
-        %206 = OpBitcast %uint %184
-        %207 = OpBitcast %uint %185
-        %208 = OpBitcast %uint %186
-        %205 = OpFunctionCall %v2float %load_a_inner_p0_a_p1_m_p2 %206 %207 %208
-        %209 = OpFunctionCall %int %i
-        %211 = OpBitcast %uint %184
-        %212 = OpBitcast %uint %185
-        %213 = OpBitcast %uint %186
-        %214 = OpBitcast %uint %209
-        %210 = OpFunctionCall %float %load_a_inner_p0_a_p1_m_p2_p3 %211 %212 %213 %214
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.wgsl
deleted file mode 100644
index 7cc47fe..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/dynamic_index_via_ptr.wgsl.expected.wgsl
+++ /dev/null

@@ -1,33 +0,0 @@
-struct Inner {
-  m : mat4x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-var<private> counter = 0;
-
-fn i() -> i32 {
-  counter++;
-  return counter;
-}
-
-@compute @workgroup_size(1)
-fn f() {
-  let p_a = &(a);
-  let p_a_i = &((*(p_a))[i()]);
-  let p_a_i_a = &((*(p_a_i)).a);
-  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
-  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
-  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
-  let l_a : array<Outer, 4> = *(p_a);
-  let l_a_i : Outer = *(p_a_i);
-  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
-  let l_a_i_a_i : Inner = *(p_a_i_a_i);
-  let l_a_i_a_i_m : mat4x2<f32> = *(p_a_i_a_i_m);
-  let l_a_i_a_i_m_i : vec2<f32> = *(p_a_i_a_i_m_i);
-  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl
deleted file mode 100644
index 582e8ac..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl
+++ /dev/null

@@ -1,28 +0,0 @@
-struct Inner {
-  m : mat4x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  let p_a = &a;
-  let p_a_3 = &((*p_a)[3]);
-  let p_a_3_a = &((*p_a_3).a);
-  let p_a_3_a_2 = &((*p_a_3_a)[2]);
-  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
-  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
-
-
-  let l_a             : array<Outer, 4> = *p_a;
-  let l_a_3           : Outer           = *p_a_3;
-  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
-  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
-  let l_a_3_a_2_m     : mat4x2<f32>     = *p_a_3_a_2_m;
-  let l_a_3_a_2_m_1   : vec2<f32>       = *p_a_3_a_2_m_1;
-  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
deleted file mode 100644
index f499d7c..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,66 +0,0 @@
-struct Inner {
-  float4x2 m;
-};
-struct Outer {
-  Inner a[4];
-};
-
-cbuffer cbuffer_a : register(b0, space0) {
-  uint4 a[32];
-};
-
-float4x2 tint_symbol_4(uint4 buffer[32], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-Inner tint_symbol_3(uint4 buffer[32], uint offset) {
-  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
-  return tint_symbol_7;
-}
-
-typedef Inner tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
-  Inner arr[4] = (Inner[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_3(buffer, (offset + (i * 32u)));
-    }
-  }
-  return arr;
-}
-
-Outer tint_symbol_1(uint4 buffer[32], uint offset) {
-  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
-  return tint_symbol_8;
-}
-
-typedef Outer tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
-  Outer arr_1[4] = (Outer[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 128u)));
-    }
-  }
-  return arr_1;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const Outer l_a[4] = tint_symbol(a, 0u);
-  const Outer l_a_3 = tint_symbol_1(a, 384u);
-  const Inner l_a_3_a[4] = tint_symbol_2(a, 384u);
-  const Inner l_a_3_a_2 = tint_symbol_3(a, 448u);
-  const float4x2 l_a_3_a_2_m = tint_symbol_4(a, 448u);
-  const float2 l_a_3_a_2_m_1 = asfloat(a[28].zw);
-  const float l_a_3_a_2_m_1_0 = asfloat(a[28].z);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
deleted file mode 100644
index f499d7c..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,66 +0,0 @@
-struct Inner {
-  float4x2 m;
-};
-struct Outer {
-  Inner a[4];
-};
-
-cbuffer cbuffer_a : register(b0, space0) {
-  uint4 a[32];
-};
-
-float4x2 tint_symbol_4(uint4 buffer[32], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-Inner tint_symbol_3(uint4 buffer[32], uint offset) {
-  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
-  return tint_symbol_7;
-}
-
-typedef Inner tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
-  Inner arr[4] = (Inner[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_3(buffer, (offset + (i * 32u)));
-    }
-  }
-  return arr;
-}
-
-Outer tint_symbol_1(uint4 buffer[32], uint offset) {
-  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
-  return tint_symbol_8;
-}
-
-typedef Outer tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
-  Outer arr_1[4] = (Outer[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 128u)));
-    }
-  }
-  return arr_1;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const Outer l_a[4] = tint_symbol(a, 0u);
-  const Outer l_a_3 = tint_symbol_1(a, 384u);
-  const Inner l_a_3_a[4] = tint_symbol_2(a, 384u);
-  const Inner l_a_3_a_2 = tint_symbol_3(a, 448u);
-  const float4x2 l_a_3_a_2_m = tint_symbol_4(a, 448u);
-  const float2 l_a_3_a_2_m_1 = asfloat(a[28].zw);
-  const float l_a_3_a_2_m_1_0 = asfloat(a[28].z);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.glsl
deleted file mode 100644
index 658fe9e..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.glsl
+++ /dev/null

@@ -1,78 +0,0 @@
-#version 310 es
-
-struct Inner {
-  mat4x2 m;
-};
-
-struct Inner_std140 {
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  vec2 m_3;
-};
-
-struct Outer {
-  Inner a[4];
-};
-
-struct Outer_std140 {
-  Inner_std140 a[4];
-};
-
-layout(binding = 0, std140) uniform a_block_std140_ubo {
-  Outer_std140 inner[4];
-} a;
-
-Inner conv_Inner(Inner_std140 val) {
-  return Inner(mat4x2(val.m_0, val.m_1, val.m_2, val.m_3));
-}
-
-Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
-  Inner arr[4] = Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_Inner(val[i]);
-    }
-  }
-  return arr;
-}
-
-Outer conv_Outer(Outer_std140 val) {
-  return Outer(conv_arr4_Inner(val.a));
-}
-
-Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
-  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)))));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_Outer(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat4x2 load_a_inner_3_a_2_m() {
-  return mat4x2(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1, a.inner[3u].a[2u].m_2, a.inner[3u].a[2u].m_3);
-}
-
-void f() {
-  Outer p_a[4] = conv_arr4_Outer(a.inner);
-  Outer p_a_3 = conv_Outer(a.inner[3u]);
-  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
-  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
-  mat4x2 p_a_3_a_2_m = load_a_inner_3_a_2_m();
-  vec2 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
-  Outer l_a[4] = conv_arr4_Outer(a.inner);
-  Outer l_a_3 = conv_Outer(a.inner[3u]);
-  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
-  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
-  mat4x2 l_a_3_a_2_m = load_a_inner_3_a_2_m();
-  vec2 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
-  float l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.msl
deleted file mode 100644
index 457e620..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.msl
+++ /dev/null

@@ -1,35 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct Inner {
-  /* 0x0000 */ float4x2 m;
-};
-
-struct Outer {
-  /* 0x0000 */ tint_array<Inner, 4> a;
-};
-
-kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
-  tint_array<Outer, 4> const l_a = *(tint_symbol);
-  Outer const l_a_3 = (*(tint_symbol))[3];
-  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
-  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
-  float4x2 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
-  float2 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
-  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.spvasm
deleted file mode 100644
index 730d12b..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.spvasm
+++ /dev/null

@@ -1,233 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 148
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %a_block_std140 "a_block_std140"
-               OpMemberName %a_block_std140 0 "inner"
-               OpName %Outer_std140 "Outer_std140"
-               OpMemberName %Outer_std140 0 "a"
-               OpName %Inner_std140 "Inner_std140"
-               OpMemberName %Inner_std140 0 "m_0"
-               OpMemberName %Inner_std140 1 "m_1"
-               OpMemberName %Inner_std140 2 "m_2"
-               OpMemberName %Inner_std140 3 "m_3"
-               OpName %a "a"
-               OpName %Inner "Inner"
-               OpMemberName %Inner 0 "m"
-               OpName %conv_Inner "conv_Inner"
-               OpName %val "val"
-               OpName %conv_arr4_Inner "conv_arr4_Inner"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index_1 "var_for_index_1"
-               OpName %Outer "Outer"
-               OpMemberName %Outer 0 "a"
-               OpName %conv_Outer "conv_Outer"
-               OpName %val_1 "val"
-               OpName %conv_arr4_Outer "conv_arr4_Outer"
-               OpName %val_2 "val"
-               OpName %arr_0 "arr"
-               OpName %i_0 "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
-               OpName %f "f"
-               OpDecorate %a_block_std140 Block
-               OpMemberDecorate %a_block_std140 0 Offset 0
-               OpMemberDecorate %Outer_std140 0 Offset 0
-               OpMemberDecorate %Inner_std140 0 Offset 0
-               OpMemberDecorate %Inner_std140 1 Offset 8
-               OpMemberDecorate %Inner_std140 2 Offset 16
-               OpMemberDecorate %Inner_std140 3 Offset 24
-               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 32
-               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 128
-               OpDecorate %a NonWritable
-               OpDecorate %a DescriptorSet 0
-               OpDecorate %a Binding 0
-               OpMemberDecorate %Inner 0 Offset 0
-               OpMemberDecorate %Inner 0 ColMajor
-               OpMemberDecorate %Inner 0 MatrixStride 8
-               OpDecorate %_arr_Inner_uint_4 ArrayStride 32
-               OpMemberDecorate %Outer 0 Offset 0
-               OpDecorate %_arr_Outer_uint_4 ArrayStride 128
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-%Inner_std140 = OpTypeStruct %v2float %v2float %v2float %v2float
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
-%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
-%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
-%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
-%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
-          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
-%mat4v2float = OpTypeMatrix %v2float 4
-      %Inner = OpTypeStruct %mat4v2float
-         %12 = OpTypeFunction %Inner %Inner_std140
-%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
-         %24 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
-%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
-         %31 = OpConstantNull %_arr_Inner_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %34 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
-         %47 = OpConstantNull %_arr_Inner_std140_uint_4
-%_ptr_Function_Inner = OpTypePointer Function %Inner
-%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
-     %uint_1 = OpConstant %uint 1
-      %Outer = OpTypeStruct %_arr_Inner_uint_4
-         %60 = OpTypeFunction %Outer %Outer_std140
-%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
-         %68 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
-%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
-         %75 = OpConstantNull %_arr_Outer_uint_4
-%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
-         %88 = OpConstantNull %_arr_Outer_std140_uint_4
-%_ptr_Function_Outer = OpTypePointer Function %Outer
-%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
-        %100 = OpTypeFunction %mat4v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_3 = OpConstant %uint 3
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-       %void = OpTypeVoid
-        %123 = OpTypeFunction %void
-%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
-%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
-%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
-%_ptr_Uniform_float = OpTypePointer Uniform %float
- %conv_Inner = OpFunction %Inner None %12
-        %val = OpFunctionParameter %Inner_std140
-         %17 = OpLabel
-         %18 = OpCompositeExtract %v2float %val 0
-         %19 = OpCompositeExtract %v2float %val 1
-         %20 = OpCompositeExtract %v2float %val 2
-         %21 = OpCompositeExtract %v2float %val 3
-         %22 = OpCompositeConstruct %mat4v2float %18 %19 %20 %21
-         %23 = OpCompositeConstruct %Inner %22
-               OpReturnValue %23
-               OpFunctionEnd
-%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %24
-      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
-         %28 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %31
-          %i = OpVariable %_ptr_Function_uint Function %34
-%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %47
-               OpBranch %35
-         %35 = OpLabel
-               OpLoopMerge %36 %37 None
-               OpBranch %38
-         %38 = OpLabel
-         %40 = OpLoad %uint %i
-         %41 = OpULessThan %bool %40 %uint_4
-         %39 = OpLogicalNot %bool %41
-               OpSelectionMerge %43 None
-               OpBranchConditional %39 %44 %43
-         %44 = OpLabel
-               OpBranch %36
-         %43 = OpLabel
-               OpStore %var_for_index_1 %val_0
-         %48 = OpLoad %uint %i
-         %50 = OpAccessChain %_ptr_Function_Inner %arr %48
-         %52 = OpLoad %uint %i
-         %54 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %52
-         %55 = OpLoad %Inner_std140 %54
-         %51 = OpFunctionCall %Inner %conv_Inner %55
-               OpStore %50 %51
-               OpBranch %37
-         %37 = OpLabel
-         %56 = OpLoad %uint %i
-         %58 = OpIAdd %uint %56 %uint_1
-               OpStore %i %58
-               OpBranch %35
-         %36 = OpLabel
-         %59 = OpLoad %_arr_Inner_uint_4 %arr
-               OpReturnValue %59
-               OpFunctionEnd
- %conv_Outer = OpFunction %Outer None %60
-      %val_1 = OpFunctionParameter %Outer_std140
-         %64 = OpLabel
-         %66 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
-         %65 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %66
-         %67 = OpCompositeConstruct %Outer %65
-               OpReturnValue %67
-               OpFunctionEnd
-%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %68
-      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
-         %72 = OpLabel
-      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %75
-        %i_0 = OpVariable %_ptr_Function_uint Function %34
-%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %88
-               OpBranch %77
-         %77 = OpLabel
-               OpLoopMerge %78 %79 None
-               OpBranch %80
-         %80 = OpLabel
-         %82 = OpLoad %uint %i_0
-         %83 = OpULessThan %bool %82 %uint_4
-         %81 = OpLogicalNot %bool %83
-               OpSelectionMerge %84 None
-               OpBranchConditional %81 %85 %84
-         %85 = OpLabel
-               OpBranch %78
-         %84 = OpLabel
-               OpStore %var_for_index %val_2
-         %89 = OpLoad %uint %i_0
-         %91 = OpAccessChain %_ptr_Function_Outer %arr_0 %89
-         %93 = OpLoad %uint %i_0
-         %95 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %93
-         %96 = OpLoad %Outer_std140 %95
-         %92 = OpFunctionCall %Outer %conv_Outer %96
-               OpStore %91 %92
-               OpBranch %79
-         %79 = OpLabel
-         %97 = OpLoad %uint %i_0
-         %98 = OpIAdd %uint %97 %uint_1
-               OpStore %i_0 %98
-               OpBranch %77
-         %78 = OpLabel
-         %99 = OpLoad %_arr_Outer_uint_4 %arr_0
-               OpReturnValue %99
-               OpFunctionEnd
-%load_a_inner_3_a_2_m = OpFunction %mat4v2float None %100
-        %102 = OpLabel
-        %108 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
-        %111 = OpAccessChain %_ptr_Uniform_v2float %108 %uint_0
-        %112 = OpLoad %v2float %111
-        %114 = OpAccessChain %_ptr_Uniform_v2float %108 %uint_1
-        %115 = OpLoad %v2float %114
-        %117 = OpAccessChain %_ptr_Uniform_v2float %108 %uint_2
-        %118 = OpLoad %v2float %117
-        %120 = OpAccessChain %_ptr_Uniform_v2float %108 %uint_3
-        %121 = OpLoad %v2float %120
-        %122 = OpCompositeConstruct %mat4v2float %112 %115 %118 %121
-               OpReturnValue %122
-               OpFunctionEnd
-          %f = OpFunction %void None %123
-        %126 = OpLabel
-        %129 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
-        %130 = OpLoad %_arr_Outer_std140_uint_4 %129
-        %127 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %130
-        %133 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
-        %134 = OpLoad %Outer_std140 %133
-        %131 = OpFunctionCall %Outer %conv_Outer %134
-        %137 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
-        %138 = OpLoad %_arr_Inner_std140_uint_4 %137
-        %135 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %138
-        %140 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
-        %141 = OpLoad %Inner_std140 %140
-        %139 = OpFunctionCall %Inner %conv_Inner %141
-        %142 = OpFunctionCall %mat4v2float %load_a_inner_3_a_2_m
-        %143 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
-        %144 = OpLoad %v2float %143
-        %146 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %34
-        %147 = OpLoad %float %146
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.wgsl
deleted file mode 100644
index 6347a0e..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/static_index_via_ptr.wgsl.expected.wgsl
+++ /dev/null

@@ -1,26 +0,0 @@
-struct Inner {
-  m : mat4x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  let p_a = &(a);
-  let p_a_3 = &((*(p_a))[3]);
-  let p_a_3_a = &((*(p_a_3)).a);
-  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
-  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
-  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
-  let l_a : array<Outer, 4> = *(p_a);
-  let l_a_3 : Outer = *(p_a_3);
-  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
-  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
-  let l_a_3_a_2_m : mat4x2<f32> = *(p_a_3_a_2_m);
-  let l_a_3_a_2_m_1 : vec2<f32> = *(p_a_3_a_2_m_1);
-  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl
deleted file mode 100644
index 7aa8fa0..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl
+++ /dev/null

@@ -1,14 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    let t = transpose(u[2].m);
-    let l = length(u[0].m[1].yx);
-    let a = abs(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.dxc.hlsl
deleted file mode 100644
index f1fc60e..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,23 +0,0 @@
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-
-float4x2 tint_symbol(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const float2x4 t = transpose(tint_symbol(u, 104u));
-  const float l = length(asfloat(u[1].xy).yx);
-  const float a = abs(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.fxc.hlsl
deleted file mode 100644
index f1fc60e..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,23 +0,0 @@
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-
-float4x2 tint_symbol(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  const float2x4 t = transpose(tint_symbol(u, 104u));
-  const float l = length(asfloat(u[1].xy).yx);
-  const float a = abs(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.glsl
deleted file mode 100644
index b5bbb97..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.glsl
+++ /dev/null

@@ -1,40 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat4x2 m;
-  int after;
-  uint pad_1;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  vec2 m_3;
-  int after;
-  uint pad_1;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-mat4x2 load_u_inner_2_m() {
-  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
-}
-
-void f() {
-  mat2x4 t = transpose(load_u_inner_2_m());
-  float l = length(u.inner[0u].m_1.yx);
-  float a = abs(u.inner[0u].m_1.yx[0u]);
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.msl
deleted file mode 100644
index d66b53e..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.msl
+++ /dev/null

@@ -1,31 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float4x2 m;
-  /* 0x0028 */ int after;
-  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
-};
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
-  float2x4 const t = transpose((*(tint_symbol))[2].m);
-  float const l = length(float2((*(tint_symbol))[0].m[1]).yx);
-  float const a = fabs(float2((*(tint_symbol))[0].m[1]).yx[0]);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.spvasm
deleted file mode 100644
index 831410f..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.spvasm
+++ /dev/null

@@ -1,86 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 55
-; Schema: 0
-               OpCapability Shader
-         %45 = OpExtInstImport "GLSL.std.450"
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "m_2"
-               OpMemberName %S_std140 4 "m_3"
-               OpMemberName %S_std140 5 "after"
-               OpName %u "u"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpMemberDecorate %S_std140 4 Offset 32
-               OpMemberDecorate %S_std140 5 Offset 40
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat4v2float = OpTypeMatrix %v2float 4
-         %11 = OpTypeFunction %mat4v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-     %uint_1 = OpConstant %uint 1
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-     %uint_3 = OpConstant %uint 3
-       %void = OpTypeVoid
-         %36 = OpTypeFunction %void
-    %v4float = OpTypeVector %float 4
-%mat2v4float = OpTypeMatrix %v4float 2
-         %46 = OpConstantNull %uint
-%load_u_inner_2_m = OpFunction %mat4v2float None %11
-         %14 = OpLabel
-         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %23 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_1
-         %24 = OpLoad %v2float %23
-         %26 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_2
-         %27 = OpLoad %v2float %26
-         %30 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_3
-         %31 = OpLoad %v2float %30
-         %33 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_4
-         %34 = OpLoad %v2float %33
-         %35 = OpCompositeConstruct %mat4v2float %24 %27 %31 %34
-               OpReturnValue %35
-               OpFunctionEnd
-          %f = OpFunction %void None %36
-         %39 = OpLabel
-         %43 = OpFunctionCall %mat4v2float %load_u_inner_2_m
-         %40 = OpTranspose %mat2v4float %43
-         %47 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %46 %uint_2
-         %48 = OpLoad %v2float %47
-         %49 = OpVectorShuffle %v2float %48 %48 1 0
-         %44 = OpExtInst %float %45 Length %49
-         %51 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %46 %uint_2
-         %52 = OpLoad %v2float %51
-         %53 = OpVectorShuffle %v2float %52 %52 1 0
-         %54 = OpCompositeExtract %float %53 0
-         %50 = OpExtInst %float %45 FAbs %54
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.wgsl
deleted file mode 100644
index 59bf43d..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_builtin.wgsl.expected.wgsl
+++ /dev/null

@@ -1,14 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  let t = transpose(u[2].m);
-  let l = length(u[0].m[1].yx);
-  let a = abs(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl
deleted file mode 100644
index 41ca70c..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl
+++ /dev/null

@@ -1,22 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-fn a(a : array<S, 4>) {}
-fn b(s : S) {}
-fn c(m : mat4x2<f32>) {}
-fn d(v : vec2<f32>) {}
-fn e(f : f32) {}
-
-@compute @workgroup_size(1)
-fn f() {
-    a(u);
-    b(u[2]);
-    c(u[2].m);
-    d(u[0].m[1].yx);
-    e(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 20e3b99..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,64 +0,0 @@
-struct S {
-  int before;
-  float4x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-
-void a(S a_1[4]) {
-}
-
-void b(S s) {
-}
-
-void c(float4x2 m) {
-}
-
-void d(float2 v) {
-}
-
-void e(float f_1) {
-}
-
-float4x2 tint_symbol_3(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[12], uint offset) {
-  const uint scalar_offset_4 = ((offset + 0u)) / 4;
-  const uint scalar_offset_5 = ((offset + 40u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  a(tint_symbol(u, 0u));
-  b(tint_symbol_1(u, 96u));
-  c(tint_symbol_3(u, 104u));
-  d(asfloat(u[1].xy).yx);
-  e(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 20e3b99..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,64 +0,0 @@
-struct S {
-  int before;
-  float4x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-
-void a(S a_1[4]) {
-}
-
-void b(S s) {
-}
-
-void c(float4x2 m) {
-}
-
-void d(float2 v) {
-}
-
-void e(float f_1) {
-}
-
-float4x2 tint_symbol_3(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[12], uint offset) {
-  const uint scalar_offset_4 = ((offset + 0u)) / 4;
-  const uint scalar_offset_5 = ((offset + 40u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  a(tint_symbol(u, 0u));
-  b(tint_symbol_1(u, 96u));
-  c(tint_symbol_3(u, 104u));
-  d(asfloat(u[1].xy).yx);
-  e(asfloat(u[1].xy).yx.x);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.glsl
deleted file mode 100644
index 43a4f88..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.glsl
+++ /dev/null

@@ -1,71 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat4x2 m;
-  int after;
-  uint pad_1;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  vec2 m_3;
-  int after;
-  uint pad_1;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-void a(S a_1[4]) {
-}
-
-void b(S s) {
-}
-
-void c(mat4x2 m) {
-}
-
-void d(vec2 v) {
-}
-
-void e(float f_1) {
-}
-
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.after, val.pad_1);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat4x2 load_u_inner_2_m() {
-  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
-}
-
-void f() {
-  a(conv_arr4_S(u.inner));
-  b(conv_S(u.inner[2u]));
-  c(load_u_inner_2_m());
-  d(u.inner[0u].m_1.yx);
-  e(u.inner[0u].m_1.yx[0u]);
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.msl
deleted file mode 100644
index 61f3150..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.msl
+++ /dev/null

@@ -1,48 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float4x2 m;
-  /* 0x0028 */ int after;
-  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
-};
-
-void a(tint_array<S, 4> a_1) {
-}
-
-void b(S s) {
-}
-
-void c(float4x2 m) {
-}
-
-void d(float2 v) {
-}
-
-void e(float f_1) {
-}
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
-  a(*(tint_symbol));
-  b((*(tint_symbol))[2]);
-  c((*(tint_symbol))[2].m);
-  d(float2((*(tint_symbol))[0].m[1]).yx);
-  e(float2((*(tint_symbol))[0].m[1]).yx[0]);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.spvasm
deleted file mode 100644
index 63abbf3..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.spvasm
+++ /dev/null

@@ -1,211 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 128
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "m_2"
-               OpMemberName %S_std140 4 "m_3"
-               OpMemberName %S_std140 5 "after"
-               OpName %u "u"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %a "a"
-               OpName %a_1 "a_1"
-               OpName %b "b"
-               OpName %s "s"
-               OpName %c "c"
-               OpName %m "m"
-               OpName %d "d"
-               OpName %v "v"
-               OpName %e "e"
-               OpName %f_1 "f_1"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpMemberDecorate %S_std140 4 Offset 32
-               OpMemberDecorate %S_std140 5 Offset 40
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 40
-               OpDecorate %_arr_S_uint_4 ArrayStride 48
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-       %void = OpTypeVoid
-%mat4v2float = OpTypeMatrix %v2float 4
-          %S = OpTypeStruct %int %mat4v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-         %11 = OpTypeFunction %void %_arr_S_uint_4
-         %19 = OpTypeFunction %void %S
-         %23 = OpTypeFunction %void %mat4v2float
-         %27 = OpTypeFunction %void %v2float
-         %31 = OpTypeFunction %void %float
-         %35 = OpTypeFunction %S %S_std140
-         %47 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-         %53 = OpConstantNull %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %56 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %69 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %82 = OpTypeFunction %mat4v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-     %uint_3 = OpConstant %uint 3
-        %105 = OpTypeFunction %void
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-          %a = OpFunction %void None %11
-        %a_1 = OpFunctionParameter %_arr_S_uint_4
-         %18 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %b = OpFunction %void None %19
-          %s = OpFunctionParameter %S
-         %22 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %c = OpFunction %void None %23
-          %m = OpFunctionParameter %mat4v2float
-         %26 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %d = OpFunction %void None %27
-          %v = OpFunctionParameter %v2float
-         %30 = OpLabel
-               OpReturn
-               OpFunctionEnd
-          %e = OpFunction %void None %31
-        %f_1 = OpFunctionParameter %float
-         %34 = OpLabel
-               OpReturn
-               OpFunctionEnd
-     %conv_S = OpFunction %S None %35
-        %val = OpFunctionParameter %S_std140
-         %38 = OpLabel
-         %39 = OpCompositeExtract %int %val 0
-         %40 = OpCompositeExtract %v2float %val 1
-         %41 = OpCompositeExtract %v2float %val 2
-         %42 = OpCompositeExtract %v2float %val 3
-         %43 = OpCompositeExtract %v2float %val 4
-         %44 = OpCompositeConstruct %mat4v2float %40 %41 %42 %43
-         %45 = OpCompositeExtract %int %val 5
-         %46 = OpCompositeConstruct %S %39 %44 %45
-               OpReturnValue %46
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %47
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %50 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %53
-          %i = OpVariable %_ptr_Function_uint Function %56
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %69
-               OpBranch %57
-         %57 = OpLabel
-               OpLoopMerge %58 %59 None
-               OpBranch %60
-         %60 = OpLabel
-         %62 = OpLoad %uint %i
-         %63 = OpULessThan %bool %62 %uint_4
-         %61 = OpLogicalNot %bool %63
-               OpSelectionMerge %65 None
-               OpBranchConditional %61 %66 %65
-         %66 = OpLabel
-               OpBranch %58
-         %65 = OpLabel
-               OpStore %var_for_index %val_0
-         %70 = OpLoad %uint %i
-         %72 = OpAccessChain %_ptr_Function_S %arr %70
-         %74 = OpLoad %uint %i
-         %76 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %74
-         %77 = OpLoad %S_std140 %76
-         %73 = OpFunctionCall %S %conv_S %77
-               OpStore %72 %73
-               OpBranch %59
-         %59 = OpLabel
-         %78 = OpLoad %uint %i
-         %80 = OpIAdd %uint %78 %uint_1
-               OpStore %i %80
-               OpBranch %57
-         %58 = OpLabel
-         %81 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %81
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat4v2float None %82
-         %84 = OpLabel
-         %89 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %92 = OpAccessChain %_ptr_Uniform_v2float %89 %uint_1
-         %93 = OpLoad %v2float %92
-         %95 = OpAccessChain %_ptr_Uniform_v2float %89 %uint_2
-         %96 = OpLoad %v2float %95
-         %99 = OpAccessChain %_ptr_Uniform_v2float %89 %uint_3
-        %100 = OpLoad %v2float %99
-        %102 = OpAccessChain %_ptr_Uniform_v2float %89 %uint_4
-        %103 = OpLoad %v2float %102
-        %104 = OpCompositeConstruct %mat4v2float %93 %96 %100 %103
-               OpReturnValue %104
-               OpFunctionEnd
-          %f = OpFunction %void None %105
-        %107 = OpLabel
-        %111 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-        %112 = OpLoad %_arr_S_std140_uint_4 %111
-        %109 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %112
-        %108 = OpFunctionCall %void %a %109
-        %115 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-        %116 = OpLoad %S_std140 %115
-        %114 = OpFunctionCall %S %conv_S %116
-        %113 = OpFunctionCall %void %b %114
-        %118 = OpFunctionCall %mat4v2float %load_u_inner_2_m
-        %117 = OpFunctionCall %void %c %118
-        %120 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %56 %uint_2
-        %121 = OpLoad %v2float %120
-        %122 = OpVectorShuffle %v2float %121 %121 1 0
-        %119 = OpFunctionCall %void %d %122
-        %124 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %56 %uint_2
-        %125 = OpLoad %v2float %124
-        %126 = OpVectorShuffle %v2float %125 %125 1 0
-        %127 = OpCompositeExtract %float %126 0
-        %123 = OpFunctionCall %void %e %127
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.wgsl
deleted file mode 100644
index 29258cc..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_fn.wgsl.expected.wgsl
+++ /dev/null

@@ -1,31 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-fn a(a : array<S, 4>) {
-}
-
-fn b(s : S) {
-}
-
-fn c(m : mat4x2<f32>) {
-}
-
-fn d(v : vec2<f32>) {
-}
-
-fn e(f : f32) {
-}
-
-@compute @workgroup_size(1)
-fn f() {
-  a(u);
-  b(u[2]);
-  c(u[2].m);
-  d(u[0].m[1].yx);
-  e(u[0].m[1].yx.x);
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl
deleted file mode 100644
index 4d8b759..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-var<private> p : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    p = u;
-    p[1] = u[2];
-    p[3].m = u[2].m;
-    p[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 1483eb4..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,49 +0,0 @@
-struct S {
-  int before;
-  float4x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-static S p[4] = (S[4])0;
-
-float4x2 tint_symbol_3(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[12], uint offset) {
-  const uint scalar_offset_4 = ((offset + 0u)) / 4;
-  const uint scalar_offset_5 = ((offset + 40u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  p = tint_symbol(u, 0u);
-  p[1] = tint_symbol_1(u, 96u);
-  p[3].m = tint_symbol_3(u, 104u);
-  p[1].m[0] = asfloat(u[1].xy).yx;
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 1483eb4..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,49 +0,0 @@
-struct S {
-  int before;
-  float4x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-static S p[4] = (S[4])0;
-
-float4x2 tint_symbol_3(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-S tint_symbol_1(uint4 buffer[12], uint offset) {
-  const uint scalar_offset_4 = ((offset + 0u)) / 4;
-  const uint scalar_offset_5 = ((offset + 40u)) / 4;
-  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
-  return tint_symbol_5;
-}
-
-typedef S tint_symbol_ret[4];
-tint_symbol_ret tint_symbol(uint4 buffer[12], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = tint_symbol_1(buffer, (offset + (i * 48u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  p = tint_symbol(u, 0u);
-  p[1] = tint_symbol_1(u, 96u);
-  p[3].m = tint_symbol_3(u, 104u);
-  p[1].m[0] = asfloat(u[1].xy).yx;
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.glsl
deleted file mode 100644
index 1ff8aa4..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.glsl
+++ /dev/null

@@ -1,56 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat4x2 m;
-  int after;
-  uint pad_1;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  vec2 m_3;
-  int after;
-  uint pad_1;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-S p[4] = S[4](S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u));
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.after, val.pad_1);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat4x2 load_u_inner_2_m() {
-  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
-}
-
-void f() {
-  p = conv_arr4_S(u.inner);
-  p[1] = conv_S(u.inner[2u]);
-  p[3].m = load_u_inner_2_m();
-  p[1].m[0] = u.inner[0u].m_1.yx;
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.msl
deleted file mode 100644
index c5f914c..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.msl
+++ /dev/null

@@ -1,33 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float4x2 m;
-  /* 0x0028 */ int after;
-  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
-};
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
-  thread tint_array<S, 4> tint_symbol = {};
-  tint_symbol = *(tint_symbol_1);
-  tint_symbol[1] = (*(tint_symbol_1))[2];
-  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
-  tint_symbol[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.spvasm
deleted file mode 100644
index e0ac972..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.spvasm
+++ /dev/null

@@ -1,178 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 110
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "m_2"
-               OpMemberName %S_std140 4 "m_3"
-               OpMemberName %S_std140 5 "after"
-               OpName %u "u"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %p "p"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpMemberDecorate %S_std140 4 Offset 32
-               OpMemberDecorate %S_std140 5 Offset 40
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 40
-               OpDecorate %_arr_S_uint_4 ArrayStride 48
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat4v2float = OpTypeMatrix %v2float 4
-          %S = OpTypeStruct %int %mat4v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
-         %16 = OpConstantNull %_arr_S_uint_4
-          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
-         %17 = OpTypeFunction %S %S_std140
-         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %37 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %50 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %63 = OpTypeFunction %mat4v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-     %uint_3 = OpConstant %uint 3
-       %void = OpTypeVoid
-         %86 = OpTypeFunction %void
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-      %int_1 = OpConstant %int 1
-%_ptr_Private_S = OpTypePointer Private %S
-      %int_3 = OpConstant %int 3
-%_ptr_Private_mat4v2float = OpTypePointer Private %mat4v2float
-        %104 = OpConstantNull %int
-%_ptr_Private_v2float = OpTypePointer Private %v2float
-     %conv_S = OpFunction %S None %17
-        %val = OpFunctionParameter %S_std140
-         %20 = OpLabel
-         %21 = OpCompositeExtract %int %val 0
-         %22 = OpCompositeExtract %v2float %val 1
-         %23 = OpCompositeExtract %v2float %val 2
-         %24 = OpCompositeExtract %v2float %val 3
-         %25 = OpCompositeExtract %v2float %val 4
-         %26 = OpCompositeConstruct %mat4v2float %22 %23 %24 %25
-         %27 = OpCompositeExtract %int %val 5
-         %28 = OpCompositeConstruct %S %21 %26 %27
-               OpReturnValue %28
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %32 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
-          %i = OpVariable %_ptr_Function_uint Function %37
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
-               OpBranch %38
-         %38 = OpLabel
-               OpLoopMerge %39 %40 None
-               OpBranch %41
-         %41 = OpLabel
-         %43 = OpLoad %uint %i
-         %44 = OpULessThan %bool %43 %uint_4
-         %42 = OpLogicalNot %bool %44
-               OpSelectionMerge %46 None
-               OpBranchConditional %42 %47 %46
-         %47 = OpLabel
-               OpBranch %39
-         %46 = OpLabel
-               OpStore %var_for_index %val_0
-         %51 = OpLoad %uint %i
-         %53 = OpAccessChain %_ptr_Function_S %arr %51
-         %55 = OpLoad %uint %i
-         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
-         %58 = OpLoad %S_std140 %57
-         %54 = OpFunctionCall %S %conv_S %58
-               OpStore %53 %54
-               OpBranch %40
-         %40 = OpLabel
-         %59 = OpLoad %uint %i
-         %61 = OpIAdd %uint %59 %uint_1
-               OpStore %i %61
-               OpBranch %38
-         %39 = OpLabel
-         %62 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %62
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat4v2float None %63
-         %65 = OpLabel
-         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %73 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_1
-         %74 = OpLoad %v2float %73
-         %76 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_2
-         %77 = OpLoad %v2float %76
-         %80 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_3
-         %81 = OpLoad %v2float %80
-         %83 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_4
-         %84 = OpLoad %v2float %83
-         %85 = OpCompositeConstruct %mat4v2float %74 %77 %81 %84
-               OpReturnValue %85
-               OpFunctionEnd
-          %f = OpFunction %void None %86
-         %89 = OpLabel
-         %92 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-         %93 = OpLoad %_arr_S_std140_uint_4 %92
-         %90 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %93
-               OpStore %p %90
-         %96 = OpAccessChain %_ptr_Private_S %p %int_1
-         %98 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %99 = OpLoad %S_std140 %98
-         %97 = OpFunctionCall %S %conv_S %99
-               OpStore %96 %97
-        %102 = OpAccessChain %_ptr_Private_mat4v2float %p %int_3 %uint_1
-        %103 = OpFunctionCall %mat4v2float %load_u_inner_2_m
-               OpStore %102 %103
-        %106 = OpAccessChain %_ptr_Private_v2float %p %int_1 %uint_1 %104
-        %107 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
-        %108 = OpLoad %v2float %107
-        %109 = OpVectorShuffle %v2float %108 %108 1 0
-               OpStore %106 %109
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.wgsl
deleted file mode 100644
index 32b45e1..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_private.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-var<private> p : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  p = u;
-  p[1] = u[2];
-  p[3].m = u[2].m;
-  p[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl
deleted file mode 100644
index 692ff38..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    s = u;
-    s[1] = u[2];
-    s[3].m = u[2].m;
-    s[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.dxc.hlsl
deleted file mode 100644
index 8fd3fb0..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,71 +0,0 @@
-struct S {
-  int before;
-  float4x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-RWByteAddressBuffer s : register(u1, space0);
-
-void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
-  buffer.Store2((offset + 0u), asuint(value[0u]));
-  buffer.Store2((offset + 8u), asuint(value[1u]));
-  buffer.Store2((offset + 16u), asuint(value[2u]));
-  buffer.Store2((offset + 24u), asuint(value[3u]));
-}
-
-void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
-  buffer.Store((offset + 0u), asuint(value.before));
-  tint_symbol_3(buffer, (offset + 8u), value.m);
-  buffer.Store((offset + 40u), asuint(value.after));
-}
-
-void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
-  S array[4] = value;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      tint_symbol_1(buffer, (offset + (i * 48u)), array[i]);
-    }
-  }
-}
-
-float4x2 tint_symbol_8(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-S tint_symbol_6(uint4 buffer[12], uint offset) {
-  const uint scalar_offset_4 = ((offset + 0u)) / 4;
-  const uint scalar_offset_5 = ((offset + 40u)) / 4;
-  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
-  return tint_symbol_10;
-}
-
-typedef S tint_symbol_5_ret[4];
-tint_symbol_5_ret tint_symbol_5(uint4 buffer[12], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 48u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
-  tint_symbol_1(s, 48u, tint_symbol_6(u, 96u));
-  tint_symbol_3(s, 152u, tint_symbol_8(u, 104u));
-  s.Store2(56u, asuint(asfloat(u[1].xy).yx));
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.fxc.hlsl
deleted file mode 100644
index 8fd3fb0..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,71 +0,0 @@
-struct S {
-  int before;
-  float4x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-RWByteAddressBuffer s : register(u1, space0);
-
-void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
-  buffer.Store2((offset + 0u), asuint(value[0u]));
-  buffer.Store2((offset + 8u), asuint(value[1u]));
-  buffer.Store2((offset + 16u), asuint(value[2u]));
-  buffer.Store2((offset + 24u), asuint(value[3u]));
-}
-
-void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
-  buffer.Store((offset + 0u), asuint(value.before));
-  tint_symbol_3(buffer, (offset + 8u), value.m);
-  buffer.Store((offset + 40u), asuint(value.after));
-}
-
-void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
-  S array[4] = value;
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      tint_symbol_1(buffer, (offset + (i * 48u)), array[i]);
-    }
-  }
-}
-
-float4x2 tint_symbol_8(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-S tint_symbol_6(uint4 buffer[12], uint offset) {
-  const uint scalar_offset_4 = ((offset + 0u)) / 4;
-  const uint scalar_offset_5 = ((offset + 40u)) / 4;
-  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
-  return tint_symbol_10;
-}
-
-typedef S tint_symbol_5_ret[4];
-tint_symbol_5_ret tint_symbol_5(uint4 buffer[12], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 48u)));
-    }
-  }
-  return arr;
-}
-
-[numthreads(1, 1, 1)]
-void f() {
-  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
-  tint_symbol_1(s, 48u, tint_symbol_6(u, 96u));
-  tint_symbol_3(s, 152u, tint_symbol_8(u, 104u));
-  s.Store2(56u, asuint(asfloat(u[1].xy).yx));
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.glsl
deleted file mode 100644
index d9b6a9f..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.glsl
+++ /dev/null

@@ -1,59 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat4x2 m;
-  int after;
-  uint pad_1;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  vec2 m_3;
-  int after;
-  uint pad_1;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-layout(binding = 1, std430) buffer u_block_ssbo {
-  S inner[4];
-} s;
-
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.after, val.pad_1);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat4x2 load_u_inner_2_m() {
-  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
-}
-
-void f() {
-  s.inner = conv_arr4_S(u.inner);
-  s.inner[1] = conv_S(u.inner[2u]);
-  s.inner[3].m = load_u_inner_2_m();
-  s.inner[1].m[0] = u.inner[0u].m_1.yx;
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f();
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.msl
deleted file mode 100644
index 9ef8ea3..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.msl
+++ /dev/null

@@ -1,32 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float4x2 m;
-  /* 0x0028 */ int after;
-  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
-};
-
-kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
-  *(tint_symbol) = *(tint_symbol_1);
-  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
-  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
-  (*(tint_symbol))[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.spvasm
deleted file mode 100644
index bcbfadc5..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.spvasm
+++ /dev/null

@@ -1,187 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 113
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f"
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "m_2"
-               OpMemberName %S_std140 4 "m_3"
-               OpMemberName %S_std140 5 "after"
-               OpName %u "u"
-               OpName %u_block "u_block"
-               OpMemberName %u_block 0 "inner"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %s "s"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f "f"
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpMemberDecorate %S_std140 4 Offset 32
-               OpMemberDecorate %S_std140 5 Offset 40
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpDecorate %u_block Block
-               OpMemberDecorate %u_block 0 Offset 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 40
-               OpDecorate %_arr_S_uint_4 ArrayStride 48
-               OpDecorate %s DescriptorSet 0
-               OpDecorate %s Binding 1
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat4v2float = OpTypeMatrix %v2float 4
-          %S = OpTypeStruct %int %mat4v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-    %u_block = OpTypeStruct %_arr_S_uint_4
-%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
-          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
-         %17 = OpTypeFunction %S %S_std140
-         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-         %35 = OpConstantNull %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %38 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %51 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %64 = OpTypeFunction %mat4v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-     %uint_3 = OpConstant %uint 3
-       %void = OpTypeVoid
-         %87 = OpTypeFunction %void
-%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-      %int_1 = OpConstant %int 1
-%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
-      %int_3 = OpConstant %int 3
-%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
-        %107 = OpConstantNull %int
-%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
-     %conv_S = OpFunction %S None %17
-        %val = OpFunctionParameter %S_std140
-         %20 = OpLabel
-         %21 = OpCompositeExtract %int %val 0
-         %22 = OpCompositeExtract %v2float %val 1
-         %23 = OpCompositeExtract %v2float %val 2
-         %24 = OpCompositeExtract %v2float %val 3
-         %25 = OpCompositeExtract %v2float %val 4
-         %26 = OpCompositeConstruct %mat4v2float %22 %23 %24 %25
-         %27 = OpCompositeExtract %int %val 5
-         %28 = OpCompositeConstruct %S %21 %26 %27
-               OpReturnValue %28
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %32 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
-          %i = OpVariable %_ptr_Function_uint Function %38
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
-               OpBranch %39
-         %39 = OpLabel
-               OpLoopMerge %40 %41 None
-               OpBranch %42
-         %42 = OpLabel
-         %44 = OpLoad %uint %i
-         %45 = OpULessThan %bool %44 %uint_4
-         %43 = OpLogicalNot %bool %45
-               OpSelectionMerge %47 None
-               OpBranchConditional %43 %48 %47
-         %48 = OpLabel
-               OpBranch %40
-         %47 = OpLabel
-               OpStore %var_for_index %val_0
-         %52 = OpLoad %uint %i
-         %54 = OpAccessChain %_ptr_Function_S %arr %52
-         %56 = OpLoad %uint %i
-         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
-         %59 = OpLoad %S_std140 %58
-         %55 = OpFunctionCall %S %conv_S %59
-               OpStore %54 %55
-               OpBranch %41
-         %41 = OpLabel
-         %60 = OpLoad %uint %i
-         %62 = OpIAdd %uint %60 %uint_1
-               OpStore %i %62
-               OpBranch %39
-         %40 = OpLabel
-         %63 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %63
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat4v2float None %64
-         %66 = OpLabel
-         %71 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %74 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_1
-         %75 = OpLoad %v2float %74
-         %77 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_2
-         %78 = OpLoad %v2float %77
-         %81 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_3
-         %82 = OpLoad %v2float %81
-         %84 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_4
-         %85 = OpLoad %v2float %84
-         %86 = OpCompositeConstruct %mat4v2float %75 %78 %82 %85
-               OpReturnValue %86
-               OpFunctionEnd
-          %f = OpFunction %void None %87
-         %90 = OpLabel
-         %92 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
-         %95 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-         %96 = OpLoad %_arr_S_std140_uint_4 %95
-         %93 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %96
-               OpStore %92 %93
-         %99 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
-        %101 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-        %102 = OpLoad %S_std140 %101
-        %100 = OpFunctionCall %S %conv_S %102
-               OpStore %99 %100
-        %105 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %s %uint_0 %int_3 %uint_1
-        %106 = OpFunctionCall %mat4v2float %load_u_inner_2_m
-               OpStore %105 %106
-        %109 = OpAccessChain %_ptr_StorageBuffer_v2float %s %uint_0 %int_1 %uint_1 %107
-        %110 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %38 %uint_2
-        %111 = OpLoad %v2float %110
-        %112 = OpVectorShuffle %v2float %111 %111 1 0
-               OpStore %109 %112
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.wgsl
deleted file mode 100644
index 2c43651..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_storage.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  s = u;
-  s[1] = u[2];
-  s[3].m = u[2].m;
-  s[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl
deleted file mode 100644
index 63c1a5b..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl
+++ /dev/null

@@ -1,16 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-var<workgroup> w : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-    w = u;
-    w[1] = u[2];
-    w[3].m = u[2].m;
-    w[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.dxc.hlsl
deleted file mode 100644
index e80f0d1..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.dxc.hlsl
+++ /dev/null

@@ -1,65 +0,0 @@
-struct S {
-  int before;
-  float4x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-groupshared S w[4];
-
-struct tint_symbol_1 {
-  uint local_invocation_index : SV_GroupIndex;
-};
-
-float4x2 tint_symbol_5(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-S tint_symbol_3(uint4 buffer[12], uint offset) {
-  const uint scalar_offset_4 = ((offset + 0u)) / 4;
-  const uint scalar_offset_5 = ((offset + 40u)) / 4;
-  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
-  return tint_symbol_8;
-}
-
-typedef S tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[12], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 48u)));
-    }
-  }
-  return arr;
-}
-
-void f_inner(uint local_invocation_index) {
-  {
-    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-      const uint i = idx;
-      const S tint_symbol_7 = (S)0;
-      w[i] = tint_symbol_7;
-    }
-  }
-  GroupMemoryBarrierWithGroupSync();
-  w = tint_symbol_2(u, 0u);
-  w[1] = tint_symbol_3(u, 96u);
-  w[3].m = tint_symbol_5(u, 104u);
-  w[1].m[0] = asfloat(u[1].xy).yx;
-}
-
-[numthreads(1, 1, 1)]
-void f(tint_symbol_1 tint_symbol) {
-  f_inner(tint_symbol.local_invocation_index);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.fxc.hlsl
deleted file mode 100644
index e80f0d1..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.fxc.hlsl
+++ /dev/null

@@ -1,65 +0,0 @@
-struct S {
-  int before;
-  float4x2 m;
-  int after;
-};
-
-cbuffer cbuffer_u : register(b0, space0) {
-  uint4 u[12];
-};
-groupshared S w[4];
-
-struct tint_symbol_1 {
-  uint local_invocation_index : SV_GroupIndex;
-};
-
-float4x2 tint_symbol_5(uint4 buffer[12], uint offset) {
-  const uint scalar_offset = ((offset + 0u)) / 4;
-  uint4 ubo_load = buffer[scalar_offset / 4];
-  const uint scalar_offset_1 = ((offset + 8u)) / 4;
-  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
-  const uint scalar_offset_2 = ((offset + 16u)) / 4;
-  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
-  const uint scalar_offset_3 = ((offset + 24u)) / 4;
-  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
-  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
-}
-
-S tint_symbol_3(uint4 buffer[12], uint offset) {
-  const uint scalar_offset_4 = ((offset + 0u)) / 4;
-  const uint scalar_offset_5 = ((offset + 40u)) / 4;
-  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
-  return tint_symbol_8;
-}
-
-typedef S tint_symbol_2_ret[4];
-tint_symbol_2_ret tint_symbol_2(uint4 buffer[12], uint offset) {
-  S arr[4] = (S[4])0;
-  {
-    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
-      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 48u)));
-    }
-  }
-  return arr;
-}
-
-void f_inner(uint local_invocation_index) {
-  {
-    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-      const uint i = idx;
-      const S tint_symbol_7 = (S)0;
-      w[i] = tint_symbol_7;
-    }
-  }
-  GroupMemoryBarrierWithGroupSync();
-  w = tint_symbol_2(u, 0u);
-  w[1] = tint_symbol_3(u, 96u);
-  w[3].m = tint_symbol_5(u, 104u);
-  w[1].m[0] = asfloat(u[1].xy).yx;
-}
-
-[numthreads(1, 1, 1)]
-void f(tint_symbol_1 tint_symbol) {
-  f_inner(tint_symbol.local_invocation_index);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.glsl
deleted file mode 100644
index 2e8a05f..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.glsl
+++ /dev/null

@@ -1,64 +0,0 @@
-#version 310 es
-
-struct S {
-  int before;
-  uint pad;
-  mat4x2 m;
-  int after;
-  uint pad_1;
-};
-
-struct S_std140 {
-  int before;
-  uint pad;
-  vec2 m_0;
-  vec2 m_1;
-  vec2 m_2;
-  vec2 m_3;
-  int after;
-  uint pad_1;
-};
-
-layout(binding = 0, std140) uniform u_block_std140_ubo {
-  S_std140 inner[4];
-} u;
-
-shared S w[4];
-S conv_S(S_std140 val) {
-  return S(val.before, val.pad, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.after, val.pad_1);
-}
-
-S[4] conv_arr4_S(S_std140 val[4]) {
-  S arr[4] = S[4](S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0, 0u));
-  {
-    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
-      arr[i] = conv_S(val[i]);
-    }
-  }
-  return arr;
-}
-
-mat4x2 load_u_inner_2_m() {
-  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
-}
-
-void f(uint local_invocation_index) {
-  {
-    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-      uint i = idx;
-      S tint_symbol = S(0, 0u, mat4x2(vec2(0.0f), vec2(0.0f), vec2(0.0f), vec2(0.0f)), 0, 0u);
-      w[i] = tint_symbol;
-    }
-  }
-  barrier();
-  w = conv_arr4_S(u.inner);
-  w[1] = conv_S(u.inner[2u]);
-  w[3].m = load_u_inner_2_m();
-  w[1].m[0] = u.inner[0u].m_1.yx;
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  f(gl_LocalInvocationIndex);
-  return;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.msl
deleted file mode 100644
index 1ef4862..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.msl
+++ /dev/null

@@ -1,47 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-struct S {
-  /* 0x0000 */ int before;
-  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
-  /* 0x0008 */ float4x2 m;
-  /* 0x0028 */ int after;
-  /* 0x002c */ tint_array<int8_t, 4> tint_pad_1;
-};
-
-struct tint_symbol_6 {
-  tint_array<S, 4> w;
-};
-
-void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
-  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
-    uint const i = idx;
-    S const tint_symbol = S{};
-    (*(tint_symbol_1))[i] = tint_symbol;
-  }
-  threadgroup_barrier(mem_flags::mem_threadgroup);
-  *(tint_symbol_1) = *(tint_symbol_2);
-  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
-  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
-  (*(tint_symbol_1))[1].m[0] = float2((*(tint_symbol_2))[0].m[1]).yx;
-}
-
-kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
-  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.spvasm
deleted file mode 100644
index a690ffc..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.spvasm
+++ /dev/null

@@ -1,221 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 135
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
-               OpExecutionMode %f LocalSize 1 1 1
-               OpName %local_invocation_index_1 "local_invocation_index_1"
-               OpName %u_block_std140 "u_block_std140"
-               OpMemberName %u_block_std140 0 "inner"
-               OpName %S_std140 "S_std140"
-               OpMemberName %S_std140 0 "before"
-               OpMemberName %S_std140 1 "m_0"
-               OpMemberName %S_std140 2 "m_1"
-               OpMemberName %S_std140 3 "m_2"
-               OpMemberName %S_std140 4 "m_3"
-               OpMemberName %S_std140 5 "after"
-               OpName %u "u"
-               OpName %S "S"
-               OpMemberName %S 0 "before"
-               OpMemberName %S 1 "m"
-               OpMemberName %S 2 "after"
-               OpName %w "w"
-               OpName %conv_S "conv_S"
-               OpName %val "val"
-               OpName %conv_arr4_S "conv_arr4_S"
-               OpName %val_0 "val"
-               OpName %arr "arr"
-               OpName %i "i"
-               OpName %var_for_index "var_for_index"
-               OpName %load_u_inner_2_m "load_u_inner_2_m"
-               OpName %f_inner "f_inner"
-               OpName %local_invocation_index "local_invocation_index"
-               OpName %idx "idx"
-               OpName %f "f"
-               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
-               OpDecorate %u_block_std140 Block
-               OpMemberDecorate %u_block_std140 0 Offset 0
-               OpMemberDecorate %S_std140 0 Offset 0
-               OpMemberDecorate %S_std140 1 Offset 8
-               OpMemberDecorate %S_std140 2 Offset 16
-               OpMemberDecorate %S_std140 3 Offset 24
-               OpMemberDecorate %S_std140 4 Offset 32
-               OpMemberDecorate %S_std140 5 Offset 40
-               OpDecorate %_arr_S_std140_uint_4 ArrayStride 48
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %S 1 Offset 8
-               OpMemberDecorate %S 1 ColMajor
-               OpMemberDecorate %S 1 MatrixStride 8
-               OpMemberDecorate %S 2 Offset 40
-               OpDecorate %_arr_S_uint_4 ArrayStride 48
-       %uint = OpTypeInt 32 0
-%_ptr_Input_uint = OpTypePointer Input %uint
-%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
-        %int = OpTypeInt 32 1
-      %float = OpTypeFloat 32
-    %v2float = OpTypeVector %float 2
-   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
-     %uint_4 = OpConstant %uint 4
-%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
-%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
-%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
-          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
-%mat4v2float = OpTypeMatrix %v2float 4
-          %S = OpTypeStruct %int %mat4v2float %int
-%_arr_S_uint_4 = OpTypeArray %S %uint_4
-%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
-          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
-         %18 = OpTypeFunction %S %S_std140
-         %30 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
-%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
-         %36 = OpConstantNull %_arr_S_uint_4
-%_ptr_Function_uint = OpTypePointer Function %uint
-         %39 = OpConstantNull %uint
-       %bool = OpTypeBool
-%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
-         %52 = OpConstantNull %_arr_S_std140_uint_4
-%_ptr_Function_S = OpTypePointer Function %S
-%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
-     %uint_1 = OpConstant %uint 1
-         %65 = OpTypeFunction %mat4v2float
-     %uint_0 = OpConstant %uint 0
-     %uint_2 = OpConstant %uint 2
-%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
-%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
-     %uint_3 = OpConstant %uint 3
-       %void = OpTypeVoid
-         %88 = OpTypeFunction %void %uint
-%_ptr_Workgroup_S = OpTypePointer Workgroup %S
-        %106 = OpConstantNull %S
-   %uint_264 = OpConstant %uint 264
-%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
-      %int_1 = OpConstant %int 1
-      %int_3 = OpConstant %int 3
-%_ptr_Workgroup_mat4v2float = OpTypePointer Workgroup %mat4v2float
-        %124 = OpConstantNull %int
-%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
-        %130 = OpTypeFunction %void
-     %conv_S = OpFunction %S None %18
-        %val = OpFunctionParameter %S_std140
-         %21 = OpLabel
-         %22 = OpCompositeExtract %int %val 0
-         %23 = OpCompositeExtract %v2float %val 1
-         %24 = OpCompositeExtract %v2float %val 2
-         %25 = OpCompositeExtract %v2float %val 3
-         %26 = OpCompositeExtract %v2float %val 4
-         %27 = OpCompositeConstruct %mat4v2float %23 %24 %25 %26
-         %28 = OpCompositeExtract %int %val 5
-         %29 = OpCompositeConstruct %S %22 %27 %28
-               OpReturnValue %29
-               OpFunctionEnd
-%conv_arr4_S = OpFunction %_arr_S_uint_4 None %30
-      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
-         %33 = OpLabel
-        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %36
-          %i = OpVariable %_ptr_Function_uint Function %39
-%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %52
-               OpBranch %40
-         %40 = OpLabel
-               OpLoopMerge %41 %42 None
-               OpBranch %43
-         %43 = OpLabel
-         %45 = OpLoad %uint %i
-         %46 = OpULessThan %bool %45 %uint_4
-         %44 = OpLogicalNot %bool %46
-               OpSelectionMerge %48 None
-               OpBranchConditional %44 %49 %48
-         %49 = OpLabel
-               OpBranch %41
-         %48 = OpLabel
-               OpStore %var_for_index %val_0
-         %53 = OpLoad %uint %i
-         %55 = OpAccessChain %_ptr_Function_S %arr %53
-         %57 = OpLoad %uint %i
-         %59 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %57
-         %60 = OpLoad %S_std140 %59
-         %56 = OpFunctionCall %S %conv_S %60
-               OpStore %55 %56
-               OpBranch %42
-         %42 = OpLabel
-         %61 = OpLoad %uint %i
-         %63 = OpIAdd %uint %61 %uint_1
-               OpStore %i %63
-               OpBranch %40
-         %41 = OpLabel
-         %64 = OpLoad %_arr_S_uint_4 %arr
-               OpReturnValue %64
-               OpFunctionEnd
-%load_u_inner_2_m = OpFunction %mat4v2float None %65
-         %67 = OpLabel
-         %72 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-         %75 = OpAccessChain %_ptr_Uniform_v2float %72 %uint_1
-         %76 = OpLoad %v2float %75
-         %78 = OpAccessChain %_ptr_Uniform_v2float %72 %uint_2
-         %79 = OpLoad %v2float %78
-         %82 = OpAccessChain %_ptr_Uniform_v2float %72 %uint_3
-         %83 = OpLoad %v2float %82
-         %85 = OpAccessChain %_ptr_Uniform_v2float %72 %uint_4
-         %86 = OpLoad %v2float %85
-         %87 = OpCompositeConstruct %mat4v2float %76 %79 %83 %86
-               OpReturnValue %87
-               OpFunctionEnd
-    %f_inner = OpFunction %void None %88
-%local_invocation_index = OpFunctionParameter %uint
-         %92 = OpLabel
-        %idx = OpVariable %_ptr_Function_uint Function %39
-               OpStore %idx %local_invocation_index
-               OpBranch %94
-         %94 = OpLabel
-               OpLoopMerge %95 %96 None
-               OpBranch %97
-         %97 = OpLabel
-         %99 = OpLoad %uint %idx
-        %100 = OpULessThan %bool %99 %uint_4
-         %98 = OpLogicalNot %bool %100
-               OpSelectionMerge %101 None
-               OpBranchConditional %98 %102 %101
-        %102 = OpLabel
-               OpBranch %95
-        %101 = OpLabel
-        %103 = OpLoad %uint %idx
-        %105 = OpAccessChain %_ptr_Workgroup_S %w %103
-               OpStore %105 %106
-               OpBranch %96
-         %96 = OpLabel
-        %107 = OpLoad %uint %idx
-        %108 = OpIAdd %uint %107 %uint_1
-               OpStore %idx %108
-               OpBranch %94
-         %95 = OpLabel
-               OpControlBarrier %uint_2 %uint_2 %uint_264
-        %113 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
-        %114 = OpLoad %_arr_S_std140_uint_4 %113
-        %111 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %114
-               OpStore %w %111
-        %116 = OpAccessChain %_ptr_Workgroup_S %w %int_1
-        %118 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
-        %119 = OpLoad %S_std140 %118
-        %117 = OpFunctionCall %S %conv_S %119
-               OpStore %116 %117
-        %122 = OpAccessChain %_ptr_Workgroup_mat4v2float %w %int_3 %uint_1
-        %123 = OpFunctionCall %mat4v2float %load_u_inner_2_m
-               OpStore %122 %123
-        %126 = OpAccessChain %_ptr_Workgroup_v2float %w %int_1 %uint_1 %124
-        %127 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %39 %uint_2
-        %128 = OpLoad %v2float %127
-        %129 = OpVectorShuffle %v2float %128 %128 1 0
-               OpStore %126 %129
-               OpReturn
-               OpFunctionEnd
-          %f = OpFunction %void None %130
-        %132 = OpLabel
-        %134 = OpLoad %uint %local_invocation_index_1
-        %133 = OpFunctionCall %void %f_inner %134
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.wgsl
deleted file mode 100644
index bb53c7e..0000000
--- a/test/tint/buffer/uniform/std140/struct/mat4x2/to_workgroup.wgsl.expected.wgsl
+++ /dev/null

@@ -1,17 +0,0 @@
-struct S {
-  before : i32,
-  m : mat4x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-var<workgroup> w : array<S, 4>;
-
-@compute @workgroup_size(1)
-fn f() {
-  w = u;
-  w[1] = u[2];
-  w[3].m = u[2].m;
-  w[1].m[0] = u[0].m[1].yx;
-}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..d88426b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat4x2<f16>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec2<f16>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f16             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2b3b666
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,83 @@
+struct Inner {
+  matrix<float16_t, 4, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 2> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 4, 2> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (4u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint ubo_load_4 = a[scalar_offset_4 / 4][scalar_offset_4 % 4];
+  const vector<float16_t, 2> l_a_i_a_i_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (4u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c6eb34d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,88 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 4, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 2> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 4, 2> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (4u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint ubo_load_4 = a[scalar_offset_4 / 4][scalar_offset_4 % 4];
+  const vector<float16_t, 2> l_a_i_a_i_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (4u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000216C74B2530(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..3ef37e6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,165 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat4x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Inner_std140 {
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  f16vec2 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x2 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return f16mat4x2(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1, a.inner[s_save].a[s_save_1].m_2, a.inner[s_save].a[s_save_1].m_3);
+}
+
+f16vec2 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2;
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].a[p1].m_3;
+      break;
+    }
+    default: {
+      return f16vec2(0.0hf);
+      break;
+    }
+  }
+}
+
+float16_t load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2[p3];
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].a[p1].m_3[p3];
+      break;
+    }
+    default: {
+      return 0.0hf;
+      break;
+    }
+  }
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat4x2 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  f16vec2 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat4x2 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  f16vec2 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float16_t l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..2ff783e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half4x2 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  half4x2 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  half2 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  half const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..b52433a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,338 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 215
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpMemberName %Inner_std140 3 "m_3"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 4
+               OpMemberDecorate %Inner_std140 2 Offset 8
+               OpMemberDecorate %Inner_std140 3 Offset 12
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 4
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%Inner_std140 = OpTypeStruct %v2half %v2half %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v2half = OpTypeMatrix %v2half 4
+      %Inner = OpTypeStruct %mat4v2half
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %35 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %42 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %45 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %58 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %71 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %79 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %86 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %99 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %111 = OpTypeFunction %mat4v2half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+        %136 = OpTypeFunction %v2half %uint %uint %uint
+        %156 = OpConstantNull %v2half
+        %157 = OpTypeFunction %half %uint %uint %uint %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %179 = OpConstantNull %half
+       %void = OpTypeVoid
+        %180 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v2half %val 0
+         %30 = OpCompositeExtract %v2half %val 1
+         %31 = OpCompositeExtract %v2half %val 2
+         %32 = OpCompositeExtract %v2half %val 3
+         %33 = OpCompositeConstruct %mat4v2half %29 %30 %31 %32
+         %34 = OpCompositeConstruct %Inner %33
+               OpReturnValue %34
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %35
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %39 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %42
+        %i_0 = OpVariable %_ptr_Function_uint Function %45
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %58
+               OpBranch %46
+         %46 = OpLabel
+               OpLoopMerge %47 %48 None
+               OpBranch %49
+         %49 = OpLabel
+         %51 = OpLoad %uint %i_0
+         %52 = OpULessThan %bool %51 %uint_4
+         %50 = OpLogicalNot %bool %52
+               OpSelectionMerge %54 None
+               OpBranchConditional %50 %55 %54
+         %55 = OpLabel
+               OpBranch %47
+         %54 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %59 = OpLoad %uint %i_0
+         %61 = OpAccessChain %_ptr_Function_Inner %arr %59
+         %63 = OpLoad %uint %i_0
+         %65 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %63
+         %66 = OpLoad %Inner_std140 %65
+         %62 = OpFunctionCall %Inner %conv_Inner %66
+               OpStore %61 %62
+               OpBranch %48
+         %48 = OpLabel
+         %67 = OpLoad %uint %i_0
+         %69 = OpIAdd %uint %67 %uint_1
+               OpStore %i_0 %69
+               OpBranch %46
+         %47 = OpLabel
+         %70 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %70
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %71
+      %val_1 = OpFunctionParameter %Outer_std140
+         %75 = OpLabel
+         %77 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %76 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %77
+         %78 = OpCompositeConstruct %Outer %76
+               OpReturnValue %78
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %79
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %83 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %86
+        %i_1 = OpVariable %_ptr_Function_uint Function %45
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %99
+               OpBranch %88
+         %88 = OpLabel
+               OpLoopMerge %89 %90 None
+               OpBranch %91
+         %91 = OpLabel
+         %93 = OpLoad %uint %i_1
+         %94 = OpULessThan %bool %93 %uint_4
+         %92 = OpLogicalNot %bool %94
+               OpSelectionMerge %95 None
+               OpBranchConditional %92 %96 %95
+         %96 = OpLabel
+               OpBranch %89
+         %95 = OpLabel
+               OpStore %var_for_index %val_2
+        %100 = OpLoad %uint %i_1
+        %102 = OpAccessChain %_ptr_Function_Outer %arr_0 %100
+        %104 = OpLoad %uint %i_1
+        %106 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %104
+        %107 = OpLoad %Outer_std140 %106
+        %103 = OpFunctionCall %Outer %conv_Outer %107
+               OpStore %102 %103
+               OpBranch %90
+         %90 = OpLabel
+        %108 = OpLoad %uint %i_1
+        %109 = OpIAdd %uint %108 %uint_1
+               OpStore %i_1 %109
+               OpBranch %88
+         %89 = OpLabel
+        %110 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %110
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m = OpFunction %mat4v2half None %111
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %115 = OpLabel
+        %119 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
+        %122 = OpAccessChain %_ptr_Uniform_v2half %119 %uint_0
+        %123 = OpLoad %v2half %122
+        %125 = OpAccessChain %_ptr_Uniform_v2half %119 %uint_1
+        %126 = OpLoad %v2half %125
+        %129 = OpAccessChain %_ptr_Uniform_v2half %119 %uint_2
+        %130 = OpLoad %v2half %129
+        %133 = OpAccessChain %_ptr_Uniform_v2half %119 %uint_3
+        %134 = OpLoad %v2half %133
+        %135 = OpCompositeConstruct %mat4v2half %123 %126 %130 %134
+               OpReturnValue %135
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2 = OpFunction %v2half None %136
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %141 = OpLabel
+               OpSelectionMerge %142 None
+               OpSwitch %p2 %143 0 %144 1 %145 2 %146 3 %147
+        %144 = OpLabel
+        %148 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %149 = OpLoad %v2half %148
+               OpReturnValue %149
+        %145 = OpLabel
+        %150 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %151 = OpLoad %v2half %150
+               OpReturnValue %151
+        %146 = OpLabel
+        %152 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_2
+        %153 = OpLoad %v2half %152
+               OpReturnValue %153
+        %147 = OpLabel
+        %154 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_3
+        %155 = OpLoad %v2half %154
+               OpReturnValue %155
+        %143 = OpLabel
+               OpReturnValue %156
+        %142 = OpLabel
+               OpReturnValue %156
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %half None %157
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %163 = OpLabel
+               OpSelectionMerge %164 None
+               OpSwitch %p2_0 %165 0 %166 1 %167 2 %168 3 %169
+        %166 = OpLabel
+        %171 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %172 = OpLoad %half %171
+               OpReturnValue %172
+        %167 = OpLabel
+        %173 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %174 = OpLoad %half %173
+               OpReturnValue %174
+        %168 = OpLabel
+        %175 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_2 %p3
+        %176 = OpLoad %half %175
+               OpReturnValue %176
+        %169 = OpLabel
+        %177 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_3 %p3
+        %178 = OpLoad %half %177
+               OpReturnValue %178
+        %165 = OpLabel
+               OpReturnValue %179
+        %164 = OpLabel
+               OpReturnValue %179
+               OpFunctionEnd
+          %f = OpFunction %void None %180
+        %183 = OpLabel
+        %184 = OpFunctionCall %int %i
+        %185 = OpFunctionCall %int %i
+        %186 = OpFunctionCall %int %i
+        %189 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %190 = OpLoad %_arr_Outer_std140_uint_4 %189
+        %187 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %190
+        %193 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %184
+        %194 = OpLoad %Outer_std140 %193
+        %191 = OpFunctionCall %Outer %conv_Outer %194
+        %197 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %184 %uint_0
+        %198 = OpLoad %_arr_Inner_std140_uint_4 %197
+        %195 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %198
+        %200 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %184 %uint_0 %185
+        %201 = OpLoad %Inner_std140 %200
+        %199 = OpFunctionCall %Inner %conv_Inner %201
+        %203 = OpBitcast %uint %184
+        %204 = OpBitcast %uint %185
+        %202 = OpFunctionCall %mat4v2half %load_a_inner_p0_a_p1_m %203 %204
+        %206 = OpBitcast %uint %184
+        %207 = OpBitcast %uint %185
+        %208 = OpBitcast %uint %186
+        %205 = OpFunctionCall %v2half %load_a_inner_p0_a_p1_m_p2 %206 %207 %208
+        %209 = OpFunctionCall %int %i
+        %211 = OpBitcast %uint %184
+        %212 = OpBitcast %uint %185
+        %213 = OpBitcast %uint %186
+        %214 = OpBitcast %uint %209
+        %210 = OpFunctionCall %half %load_a_inner_p0_a_p1_m_p2_p3 %211 %212 %213 %214
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..af6d6d9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,36 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat4x2<f16> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec2<f16> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f16 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..195dc77
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,31 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat4x2<f16>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec2<f16>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f16             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5da92cb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,67 @@
+struct Inner {
+  matrix<float16_t, 4, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 4, 2> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 4, 2> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint ubo_load_4 = a[56].y;
+  const vector<float16_t, 2> l_a_3_a_2_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].y) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5745198
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,72 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 4, 2> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 4, 2> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 4, 2> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint ubo_load_4 = a[56].y;
+  const vector<float16_t, 2> l_a_3_a_2_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].y) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000026E60B9E9C0(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..ec129b4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,103 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat4x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Inner_std140 {
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  f16vec2 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x2 load_a_inner_3_a_2_m() {
+  return f16mat4x2(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1, a.inner[3u].a[2u].m_2, a.inner[3u].a[2u].m_3);
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat4x2 p_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec2 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat4x2 l_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec2 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float16_t l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..3dee8fd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half4x2 m;
+  /* 0x0010 */ tint_array<int8_t, 48> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  half4x2 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  half2 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  half const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..c335f50
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,237 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 148
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpMemberName %Inner_std140 3 "m_3"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 4
+               OpMemberDecorate %Inner_std140 2 Offset 8
+               OpMemberDecorate %Inner_std140 3 Offset 12
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 4
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%Inner_std140 = OpTypeStruct %v2half %v2half %v2half %v2half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+      %Inner = OpTypeStruct %mat4v2half
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %24 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %31 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %34 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %47 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %60 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %68 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %75 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %88 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %100 = OpTypeFunction %mat4v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %void = OpTypeVoid
+        %123 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v2half %val 0
+         %19 = OpCompositeExtract %v2half %val 1
+         %20 = OpCompositeExtract %v2half %val 2
+         %21 = OpCompositeExtract %v2half %val 3
+         %22 = OpCompositeConstruct %mat4v2half %18 %19 %20 %21
+         %23 = OpCompositeConstruct %Inner %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %28 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %31
+          %i = OpVariable %_ptr_Function_uint Function %34
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %47
+               OpBranch %35
+         %35 = OpLabel
+               OpLoopMerge %36 %37 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %uint %i
+         %41 = OpULessThan %bool %40 %uint_4
+         %39 = OpLogicalNot %bool %41
+               OpSelectionMerge %43 None
+               OpBranchConditional %39 %44 %43
+         %44 = OpLabel
+               OpBranch %36
+         %43 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_Inner %arr %48
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %52
+         %55 = OpLoad %Inner_std140 %54
+         %51 = OpFunctionCall %Inner %conv_Inner %55
+               OpStore %50 %51
+               OpBranch %37
+         %37 = OpLabel
+         %56 = OpLoad %uint %i
+         %58 = OpIAdd %uint %56 %uint_1
+               OpStore %i %58
+               OpBranch %35
+         %36 = OpLabel
+         %59 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %59
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %60
+      %val_1 = OpFunctionParameter %Outer_std140
+         %64 = OpLabel
+         %66 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %65 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %66
+         %67 = OpCompositeConstruct %Outer %65
+               OpReturnValue %67
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %68
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %72 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %75
+        %i_0 = OpVariable %_ptr_Function_uint Function %34
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %88
+               OpBranch %77
+         %77 = OpLabel
+               OpLoopMerge %78 %79 None
+               OpBranch %80
+         %80 = OpLabel
+         %82 = OpLoad %uint %i_0
+         %83 = OpULessThan %bool %82 %uint_4
+         %81 = OpLogicalNot %bool %83
+               OpSelectionMerge %84 None
+               OpBranchConditional %81 %85 %84
+         %85 = OpLabel
+               OpBranch %78
+         %84 = OpLabel
+               OpStore %var_for_index %val_2
+         %89 = OpLoad %uint %i_0
+         %91 = OpAccessChain %_ptr_Function_Outer %arr_0 %89
+         %93 = OpLoad %uint %i_0
+         %95 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %93
+         %96 = OpLoad %Outer_std140 %95
+         %92 = OpFunctionCall %Outer %conv_Outer %96
+               OpStore %91 %92
+               OpBranch %79
+         %79 = OpLabel
+         %97 = OpLoad %uint %i_0
+         %98 = OpIAdd %uint %97 %uint_1
+               OpStore %i_0 %98
+               OpBranch %77
+         %78 = OpLabel
+         %99 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %99
+               OpFunctionEnd
+%load_a_inner_3_a_2_m = OpFunction %mat4v2half None %100
+        %102 = OpLabel
+        %108 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %111 = OpAccessChain %_ptr_Uniform_v2half %108 %uint_0
+        %112 = OpLoad %v2half %111
+        %114 = OpAccessChain %_ptr_Uniform_v2half %108 %uint_1
+        %115 = OpLoad %v2half %114
+        %117 = OpAccessChain %_ptr_Uniform_v2half %108 %uint_2
+        %118 = OpLoad %v2half %117
+        %120 = OpAccessChain %_ptr_Uniform_v2half %108 %uint_3
+        %121 = OpLoad %v2half %120
+        %122 = OpCompositeConstruct %mat4v2half %112 %115 %118 %121
+               OpReturnValue %122
+               OpFunctionEnd
+          %f = OpFunction %void None %123
+        %126 = OpLabel
+        %129 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %130 = OpLoad %_arr_Outer_std140_uint_4 %129
+        %127 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %130
+        %133 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %134 = OpLoad %Outer_std140 %133
+        %131 = OpFunctionCall %Outer %conv_Outer %134
+        %137 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %138 = OpLoad %_arr_Inner_std140_uint_4 %137
+        %135 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %138
+        %140 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %141 = OpLoad %Inner_std140 %140
+        %139 = OpFunctionCall %Inner %conv_Inner %141
+        %142 = OpFunctionCall %mat4v2half %load_a_inner_3_a_2_m
+        %143 = OpAccessChain %_ptr_Uniform_v2half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %144 = OpLoad %v2half %143
+        %146 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %34
+        %147 = OpLoad %half %146
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..c3405e4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,29 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x2<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat4x2<f16> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec2<f16> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f16 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl
new file mode 100644
index 0000000..254c166
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].yx);
+    let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..81cfbc4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,25 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 4> t = transpose(tint_symbol(u, 260u));
+  uint ubo_load_4 = u[0].z;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  uint ubo_load_5 = u[0].z;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2786ec4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,30 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 4> t = transpose(tint_symbol(u, 260u));
+  uint ubo_load_4 = u[0].z;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  uint ubo_load_5 = u[0].z;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001CC7C77CFC0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..ee9719f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,89 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat4x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  f16vec2 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+f16mat4x2 load_u_inner_2_m() {
+  return f16mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  f16mat2x4 t = transpose(load_u_inner_2_m());
+  float16_t l = length(u.inner[0u].m_1.yx);
+  float16_t a = abs(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..49c1370
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half4x2 m;
+  /* 0x0014 */ tint_array<int8_t, 44> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  half2x4 const t = transpose((*(tint_symbol))[2].m);
+  half const l = length(half2((*(tint_symbol))[0].m[1]).yx);
+  half const a = fabs(half2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..08337b2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 55
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %45 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 12
+               OpMemberDecorate %S_std140 4 Offset 16
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+         %11 = OpTypeFunction %mat4v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %36 = OpTypeFunction %void
+     %v4half = OpTypeVector %half 4
+ %mat2v4half = OpTypeMatrix %v4half 2
+         %46 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat4v2half None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v2half %19 %uint_1
+         %24 = OpLoad %v2half %23
+         %26 = OpAccessChain %_ptr_Uniform_v2half %19 %uint_2
+         %27 = OpLoad %v2half %26
+         %30 = OpAccessChain %_ptr_Uniform_v2half %19 %uint_3
+         %31 = OpLoad %v2half %30
+         %33 = OpAccessChain %_ptr_Uniform_v2half %19 %uint_4
+         %34 = OpLoad %v2half %33
+         %35 = OpCompositeConstruct %mat4v2half %24 %27 %31 %34
+               OpReturnValue %35
+               OpFunctionEnd
+          %f = OpFunction %void None %36
+         %39 = OpLabel
+         %43 = OpFunctionCall %mat4v2half %load_u_inner_2_m
+         %40 = OpTranspose %mat2v4half %43
+         %47 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %46 %uint_2
+         %48 = OpLoad %v2half %47
+         %49 = OpVectorShuffle %v2half %48 %48 1 0
+         %44 = OpExtInst %half %45 Length %49
+         %51 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %46 %uint_2
+         %52 = OpLoad %v2half %51
+         %53 = OpVectorShuffle %v2half %52 %52 1 0
+         %54 = OpCompositeExtract %half %53 0
+         %50 = OpExtInst %half %45 FAbs %54
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..61a17a6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].yx);
+  let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl
new file mode 100644
index 0000000..8ddc186
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl

@@ -0,0 +1,25 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat4x2<f16>) {}
+fn d(v : vec2<f16>) {}
+fn e(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].yx);
+    e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7620fd9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,66 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 4, 2> m) {
+}
+
+void d(vector<float16_t, 2> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 260u));
+  uint ubo_load_4 = u[0].z;
+  d(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  uint ubo_load_5 = u[0].z;
+  e(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..4db57b9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,71 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 4, 2> m) {
+}
+
+void d(vector<float16_t, 2> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 260u));
+  uint ubo_load_4 = u[0].z;
+  d(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  uint ubo_load_5 = u[0].z;
+  e(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001CA88A4FDA0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..abb3943
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,120 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat4x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  f16vec2 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(f16mat4x2 m) {
+}
+
+void d(f16vec2 v) {
+}
+
+void e(float16_t f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x2 load_u_inner_2_m() {
+  return f16mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.yx);
+  e(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..527770e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half4x2 m;
+  /* 0x0014 */ tint_array<int8_t, 44> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(half4x2 m) {
+}
+
+void d(half2 v) {
+}
+
+void e(half f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(half2((*(tint_symbol))[0].m[1]).yx);
+  e(half2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..3174741
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,215 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 128
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 12
+               OpMemberDecorate %S_std140 4 Offset 16
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat4v2half = OpTypeMatrix %v2half 4
+          %S = OpTypeStruct %int %mat4v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat4v2half
+         %27 = OpTypeFunction %void %v2half
+         %31 = OpTypeFunction %void %half
+         %35 = OpTypeFunction %S %S_std140
+         %47 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %53 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %56 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %69 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %82 = OpTypeFunction %mat4v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_3 = OpConstant %uint 3
+        %105 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat4v2half
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v2half
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %half
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v2half %val 1
+         %41 = OpCompositeExtract %v2half %val 2
+         %42 = OpCompositeExtract %v2half %val 3
+         %43 = OpCompositeExtract %v2half %val 4
+         %44 = OpCompositeConstruct %mat4v2half %40 %41 %42 %43
+         %45 = OpCompositeExtract %int %val 5
+         %46 = OpCompositeConstruct %S %39 %44 %45
+               OpReturnValue %46
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %47
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %50 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %53
+          %i = OpVariable %_ptr_Function_uint Function %56
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %69
+               OpBranch %57
+         %57 = OpLabel
+               OpLoopMerge %58 %59 None
+               OpBranch %60
+         %60 = OpLabel
+         %62 = OpLoad %uint %i
+         %63 = OpULessThan %bool %62 %uint_4
+         %61 = OpLogicalNot %bool %63
+               OpSelectionMerge %65 None
+               OpBranchConditional %61 %66 %65
+         %66 = OpLabel
+               OpBranch %58
+         %65 = OpLabel
+               OpStore %var_for_index %val_0
+         %70 = OpLoad %uint %i
+         %72 = OpAccessChain %_ptr_Function_S %arr %70
+         %74 = OpLoad %uint %i
+         %76 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %74
+         %77 = OpLoad %S_std140 %76
+         %73 = OpFunctionCall %S %conv_S %77
+               OpStore %72 %73
+               OpBranch %59
+         %59 = OpLabel
+         %78 = OpLoad %uint %i
+         %80 = OpIAdd %uint %78 %uint_1
+               OpStore %i %80
+               OpBranch %57
+         %58 = OpLabel
+         %81 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %81
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v2half None %82
+         %84 = OpLabel
+         %89 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %92 = OpAccessChain %_ptr_Uniform_v2half %89 %uint_1
+         %93 = OpLoad %v2half %92
+         %95 = OpAccessChain %_ptr_Uniform_v2half %89 %uint_2
+         %96 = OpLoad %v2half %95
+         %99 = OpAccessChain %_ptr_Uniform_v2half %89 %uint_3
+        %100 = OpLoad %v2half %99
+        %102 = OpAccessChain %_ptr_Uniform_v2half %89 %uint_4
+        %103 = OpLoad %v2half %102
+        %104 = OpCompositeConstruct %mat4v2half %93 %96 %100 %103
+               OpReturnValue %104
+               OpFunctionEnd
+          %f = OpFunction %void None %105
+        %107 = OpLabel
+        %111 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %112 = OpLoad %_arr_S_std140_uint_4 %111
+        %109 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %112
+        %108 = OpFunctionCall %void %a %109
+        %115 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %116 = OpLoad %S_std140 %115
+        %114 = OpFunctionCall %S %conv_S %116
+        %113 = OpFunctionCall %void %b %114
+        %118 = OpFunctionCall %mat4v2half %load_u_inner_2_m
+        %117 = OpFunctionCall %void %c %118
+        %120 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %56 %uint_2
+        %121 = OpLoad %v2half %120
+        %122 = OpVectorShuffle %v2half %121 %121 1 0
+        %119 = OpFunctionCall %void %d %122
+        %124 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %56 %uint_2
+        %125 = OpLoad %v2half %124
+        %126 = OpVectorShuffle %v2half %125 %125 1 0
+        %127 = OpCompositeExtract %half %126 0
+        %123 = OpFunctionCall %void %e %127
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..61c115b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat4x2<f16>) {
+}
+
+fn d(v : vec2<f16>) {
+}
+
+fn e(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].yx);
+  e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl
new file mode 100644
index 0000000..101080f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..717e390
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,50 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 4, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 260u);
+  uint ubo_load_4 = u[0].z;
+  p[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..12b11cc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,55 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 4, 2> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 4u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 260u);
+  uint ubo_load_4 = u[0].z;
+  p[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002074AC3FD30(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..4020bea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,105 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat4x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  f16vec2 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x2 load_u_inner_2_m() {
+  return f16mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..a941446
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half4x2 m;
+  /* 0x0014 */ tint_array<int8_t, 44> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = half2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..705a738
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,182 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 110
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 12
+               OpMemberDecorate %S_std140 4 Offset 16
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+          %S = OpTypeStruct %int %mat4v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat4v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %86 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat4v2half = OpTypePointer Private %mat4v2half
+        %104 = OpConstantNull %int
+%_ptr_Private_v2half = OpTypePointer Private %v2half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2half %val 1
+         %23 = OpCompositeExtract %v2half %val 2
+         %24 = OpCompositeExtract %v2half %val 3
+         %25 = OpCompositeExtract %v2half %val 4
+         %26 = OpCompositeConstruct %mat4v2half %22 %23 %24 %25
+         %27 = OpCompositeExtract %int %val 5
+         %28 = OpCompositeConstruct %S %21 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v2half None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v2half %70 %uint_1
+         %74 = OpLoad %v2half %73
+         %76 = OpAccessChain %_ptr_Uniform_v2half %70 %uint_2
+         %77 = OpLoad %v2half %76
+         %80 = OpAccessChain %_ptr_Uniform_v2half %70 %uint_3
+         %81 = OpLoad %v2half %80
+         %83 = OpAccessChain %_ptr_Uniform_v2half %70 %uint_4
+         %84 = OpLoad %v2half %83
+         %85 = OpCompositeConstruct %mat4v2half %74 %77 %81 %84
+               OpReturnValue %85
+               OpFunctionEnd
+          %f = OpFunction %void None %86
+         %89 = OpLabel
+         %92 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %93 = OpLoad %_arr_S_std140_uint_4 %92
+         %90 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %93
+               OpStore %p %90
+         %96 = OpAccessChain %_ptr_Private_S %p %int_1
+         %98 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %99 = OpLoad %S_std140 %98
+         %97 = OpFunctionCall %S %conv_S %99
+               OpStore %96 %97
+        %102 = OpAccessChain %_ptr_Private_mat4v2half %p %int_3 %uint_1
+        %103 = OpFunctionCall %mat4v2half %load_u_inner_2_m
+               OpStore %102 %103
+        %106 = OpAccessChain %_ptr_Private_v2half %p %int_1 %uint_1 %104
+        %107 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %37 %uint_2
+        %108 = OpLoad %v2half %107
+        %109 = OpVectorShuffle %v2half %108 %108 1 0
+               OpStore %106 %109
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..be2d781
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl
new file mode 100644
index 0000000..3d014af
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..180ae47
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,72 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 4u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 2> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 4u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 388u, tint_symbol_8(u, 260u));
+  uint ubo_load_4 = u[0].z;
+  s.Store<vector<float16_t, 2> >(132u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..fc9ff30
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,77 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 4u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 2> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 4u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 388u, tint_symbol_8(u, 260u));
+  uint ubo_load_4 = u[0].z;
+  s.Store<vector<float16_t, 2> >(132u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000185CE589540(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..2e843e1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,108 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat4x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  f16vec2 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x2 load_u_inner_2_m() {
+  return f16mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..b266704
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half4x2 m;
+  /* 0x0014 */ tint_array<int8_t, 44> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = half2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..f9fa6f4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,191 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 113
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 12
+               OpMemberDecorate %S_std140 4 Offset 16
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %v2half %v2half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+          %S = OpTypeStruct %int %mat4v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %35 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %38 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %51 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %64 = OpTypeFunction %mat4v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %87 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat4v2half = OpTypePointer StorageBuffer %mat4v2half
+        %107 = OpConstantNull %int
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2half %val 1
+         %23 = OpCompositeExtract %v2half %val 2
+         %24 = OpCompositeExtract %v2half %val 3
+         %25 = OpCompositeExtract %v2half %val 4
+         %26 = OpCompositeConstruct %mat4v2half %22 %23 %24 %25
+         %27 = OpCompositeExtract %int %val 5
+         %28 = OpCompositeConstruct %S %21 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
+          %i = OpVariable %_ptr_Function_uint Function %38
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
+               OpBranch %39
+         %39 = OpLabel
+               OpLoopMerge %40 %41 None
+               OpBranch %42
+         %42 = OpLabel
+         %44 = OpLoad %uint %i
+         %45 = OpULessThan %bool %44 %uint_4
+         %43 = OpLogicalNot %bool %45
+               OpSelectionMerge %47 None
+               OpBranchConditional %43 %48 %47
+         %48 = OpLabel
+               OpBranch %40
+         %47 = OpLabel
+               OpStore %var_for_index %val_0
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_S %arr %52
+         %56 = OpLoad %uint %i
+         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
+         %59 = OpLoad %S_std140 %58
+         %55 = OpFunctionCall %S %conv_S %59
+               OpStore %54 %55
+               OpBranch %41
+         %41 = OpLabel
+         %60 = OpLoad %uint %i
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %i %62
+               OpBranch %39
+         %40 = OpLabel
+         %63 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %63
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v2half None %64
+         %66 = OpLabel
+         %71 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %74 = OpAccessChain %_ptr_Uniform_v2half %71 %uint_1
+         %75 = OpLoad %v2half %74
+         %77 = OpAccessChain %_ptr_Uniform_v2half %71 %uint_2
+         %78 = OpLoad %v2half %77
+         %81 = OpAccessChain %_ptr_Uniform_v2half %71 %uint_3
+         %82 = OpLoad %v2half %81
+         %84 = OpAccessChain %_ptr_Uniform_v2half %71 %uint_4
+         %85 = OpLoad %v2half %84
+         %86 = OpCompositeConstruct %mat4v2half %75 %78 %82 %85
+               OpReturnValue %86
+               OpFunctionEnd
+          %f = OpFunction %void None %87
+         %90 = OpLabel
+         %92 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %95 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %96 = OpLoad %_arr_S_std140_uint_4 %95
+         %93 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %96
+               OpStore %92 %93
+         %99 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+        %101 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %102 = OpLoad %S_std140 %101
+        %100 = OpFunctionCall %S %conv_S %102
+               OpStore %99 %100
+        %105 = OpAccessChain %_ptr_StorageBuffer_mat4v2half %s %uint_0 %int_3 %uint_1
+        %106 = OpFunctionCall %mat4v2half %load_u_inner_2_m
+               OpStore %105 %106
+        %109 = OpAccessChain %_ptr_StorageBuffer_v2half %s %uint_0 %int_1 %uint_1 %107
+        %110 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %38 %uint_2
+        %111 = OpLoad %v2half %110
+        %112 = OpVectorShuffle %v2half %111 %111 1 0
+               OpStore %109 %112
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..f360114
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..b6a276d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..334fe1e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,66 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 2> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 4u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 260u);
+  uint ubo_load_4 = u[0].z;
+  w[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7f448db
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,71 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 2> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 2> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 4u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 260u);
+  uint ubo_load_4 = u[0].z;
+  w[1].m[0] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001B8BDB52100(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..b6a2696
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,113 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  f16mat4x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+struct S_std140 {
+  int before;
+  f16vec2 m_0;
+  f16vec2 m_1;
+  f16vec2 m_2;
+  f16vec2 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  int after;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, f16mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.after, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21, val.pad_22, val.pad_23, val.pad_24, val.pad_25);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x2 load_u_inner_2_m() {
+  return f16mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, f16mat4x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..41f9255
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,47 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ half4x2 m;
+  /* 0x0014 */ tint_array<int8_t, 44> tint_pad;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_1;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = half2((*(tint_symbol_2))[0].m[1]).yx;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..03db478
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,225 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 135
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 12
+               OpMemberDecorate %S_std140 4 Offset 16
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 4
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 4
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+   %S_std140 = OpTypeStruct %int %v2half %v2half %v2half %v2half %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+          %S = OpTypeStruct %int %mat4v2half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %30 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %36 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %39 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %52 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %65 = OpTypeFunction %mat4v2half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %88 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+        %106 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat4v2half = OpTypePointer Workgroup %mat4v2half
+        %124 = OpConstantNull %int
+%_ptr_Workgroup_v2half = OpTypePointer Workgroup %v2half
+        %130 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v2half %val 1
+         %24 = OpCompositeExtract %v2half %val 2
+         %25 = OpCompositeExtract %v2half %val 3
+         %26 = OpCompositeExtract %v2half %val 4
+         %27 = OpCompositeConstruct %mat4v2half %23 %24 %25 %26
+         %28 = OpCompositeExtract %int %val 5
+         %29 = OpCompositeConstruct %S %22 %27 %28
+               OpReturnValue %29
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %30
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %33 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %36
+          %i = OpVariable %_ptr_Function_uint Function %39
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %52
+               OpBranch %40
+         %40 = OpLabel
+               OpLoopMerge %41 %42 None
+               OpBranch %43
+         %43 = OpLabel
+         %45 = OpLoad %uint %i
+         %46 = OpULessThan %bool %45 %uint_4
+         %44 = OpLogicalNot %bool %46
+               OpSelectionMerge %48 None
+               OpBranchConditional %44 %49 %48
+         %49 = OpLabel
+               OpBranch %41
+         %48 = OpLabel
+               OpStore %var_for_index %val_0
+         %53 = OpLoad %uint %i
+         %55 = OpAccessChain %_ptr_Function_S %arr %53
+         %57 = OpLoad %uint %i
+         %59 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %57
+         %60 = OpLoad %S_std140 %59
+         %56 = OpFunctionCall %S %conv_S %60
+               OpStore %55 %56
+               OpBranch %42
+         %42 = OpLabel
+         %61 = OpLoad %uint %i
+         %63 = OpIAdd %uint %61 %uint_1
+               OpStore %i %63
+               OpBranch %40
+         %41 = OpLabel
+         %64 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %64
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v2half None %65
+         %67 = OpLabel
+         %72 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %75 = OpAccessChain %_ptr_Uniform_v2half %72 %uint_1
+         %76 = OpLoad %v2half %75
+         %78 = OpAccessChain %_ptr_Uniform_v2half %72 %uint_2
+         %79 = OpLoad %v2half %78
+         %82 = OpAccessChain %_ptr_Uniform_v2half %72 %uint_3
+         %83 = OpLoad %v2half %82
+         %85 = OpAccessChain %_ptr_Uniform_v2half %72 %uint_4
+         %86 = OpLoad %v2half %85
+         %87 = OpCompositeConstruct %mat4v2half %76 %79 %83 %86
+               OpReturnValue %87
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %88
+%local_invocation_index = OpFunctionParameter %uint
+         %92 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %39
+               OpStore %idx %local_invocation_index
+               OpBranch %94
+         %94 = OpLabel
+               OpLoopMerge %95 %96 None
+               OpBranch %97
+         %97 = OpLabel
+         %99 = OpLoad %uint %idx
+        %100 = OpULessThan %bool %99 %uint_4
+         %98 = OpLogicalNot %bool %100
+               OpSelectionMerge %101 None
+               OpBranchConditional %98 %102 %101
+        %102 = OpLabel
+               OpBranch %95
+        %101 = OpLabel
+        %103 = OpLoad %uint %idx
+        %105 = OpAccessChain %_ptr_Workgroup_S %w %103
+               OpStore %105 %106
+               OpBranch %96
+         %96 = OpLabel
+        %107 = OpLoad %uint %idx
+        %108 = OpIAdd %uint %107 %uint_1
+               OpStore %idx %108
+               OpBranch %94
+         %95 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %113 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %114 = OpLoad %_arr_S_std140_uint_4 %113
+        %111 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %114
+               OpStore %w %111
+        %116 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %118 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %119 = OpLoad %S_std140 %118
+        %117 = OpFunctionCall %S %conv_S %119
+               OpStore %116 %117
+        %122 = OpAccessChain %_ptr_Workgroup_mat4v2half %w %int_3 %uint_1
+        %123 = OpFunctionCall %mat4v2half %load_u_inner_2_m
+               OpStore %122 %123
+        %126 = OpAccessChain %_ptr_Workgroup_v2half %w %int_1 %uint_1 %124
+        %127 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0 %39 %uint_2
+        %128 = OpLoad %v2half %127
+        %129 = OpVectorShuffle %v2half %128 %128 1 0
+               OpStore %126 %129
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %130
+        %132 = OpLabel
+        %134 = OpLoad %uint %local_invocation_index_1
+        %133 = OpFunctionCall %void %f_inner %134
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..de7011b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x2<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..2d874a7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,32 @@
+struct Inner {
+  @size(64)
+  m : mat4x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat4x2<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec2<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0554769
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,82 @@
+struct Inner {
+  float4x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x2 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float4x2 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_4 = a[scalar_offset_4 / 4];
+  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_4 & 2) ? ubo_load_4.zw : ubo_load_4.xy));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_5 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_5 / 4][scalar_offset_5 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0554769
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,82 @@
+struct Inner {
+  float4x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x2 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float4x2 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_4 = a[scalar_offset_4 / 4];
+  const float2 l_a_i_a_i_m_i = asfloat(((scalar_offset_4 & 2) ? ubo_load_4.zw : ubo_load_4.xy));
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_5 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_5 / 4][scalar_offset_5 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..5de9287
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,156 @@
+#version 310 es
+
+struct Inner {
+  mat4x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Inner_std140 {
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return mat4x2(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1, a.inner[s_save].a[s_save_1].m_2, a.inner[s_save].a[s_save_1].m_3);
+}
+
+vec2 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2;
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].a[p1].m_3;
+      break;
+    }
+    default: {
+      return vec2(0.0f);
+      break;
+    }
+  }
+}
+
+float load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2[p3];
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].a[p1].m_3[p3];
+      break;
+    }
+    default: {
+      return 0.0f;
+      break;
+    }
+  }
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  mat4x2 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  vec2 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  mat4x2 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  vec2 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..eca377a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float4x2 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  float4x2 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  float2 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..b52f94a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,334 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 215
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpMemberName %Inner_std140 3 "m_3"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpMemberDecorate %Inner_std140 3 Offset 24
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%Inner_std140 = OpTypeStruct %v2float %v2float %v2float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+%mat4v2float = OpTypeMatrix %v2float 4
+      %Inner = OpTypeStruct %mat4v2float
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %35 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %42 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %45 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %58 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %71 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %79 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %86 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %99 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %111 = OpTypeFunction %mat4v2float %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+        %136 = OpTypeFunction %v2float %uint %uint %uint
+        %156 = OpConstantNull %v2float
+        %157 = OpTypeFunction %float %uint %uint %uint %uint
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %179 = OpConstantNull %float
+       %void = OpTypeVoid
+        %180 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v2float %val 0
+         %30 = OpCompositeExtract %v2float %val 1
+         %31 = OpCompositeExtract %v2float %val 2
+         %32 = OpCompositeExtract %v2float %val 3
+         %33 = OpCompositeConstruct %mat4v2float %29 %30 %31 %32
+         %34 = OpCompositeConstruct %Inner %33
+               OpReturnValue %34
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %35
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %39 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %42
+        %i_0 = OpVariable %_ptr_Function_uint Function %45
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %58
+               OpBranch %46
+         %46 = OpLabel
+               OpLoopMerge %47 %48 None
+               OpBranch %49
+         %49 = OpLabel
+         %51 = OpLoad %uint %i_0
+         %52 = OpULessThan %bool %51 %uint_4
+         %50 = OpLogicalNot %bool %52
+               OpSelectionMerge %54 None
+               OpBranchConditional %50 %55 %54
+         %55 = OpLabel
+               OpBranch %47
+         %54 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %59 = OpLoad %uint %i_0
+         %61 = OpAccessChain %_ptr_Function_Inner %arr %59
+         %63 = OpLoad %uint %i_0
+         %65 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %63
+         %66 = OpLoad %Inner_std140 %65
+         %62 = OpFunctionCall %Inner %conv_Inner %66
+               OpStore %61 %62
+               OpBranch %48
+         %48 = OpLabel
+         %67 = OpLoad %uint %i_0
+         %69 = OpIAdd %uint %67 %uint_1
+               OpStore %i_0 %69
+               OpBranch %46
+         %47 = OpLabel
+         %70 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %70
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %71
+      %val_1 = OpFunctionParameter %Outer_std140
+         %75 = OpLabel
+         %77 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %76 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %77
+         %78 = OpCompositeConstruct %Outer %76
+               OpReturnValue %78
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %79
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %83 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %86
+        %i_1 = OpVariable %_ptr_Function_uint Function %45
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %99
+               OpBranch %88
+         %88 = OpLabel
+               OpLoopMerge %89 %90 None
+               OpBranch %91
+         %91 = OpLabel
+         %93 = OpLoad %uint %i_1
+         %94 = OpULessThan %bool %93 %uint_4
+         %92 = OpLogicalNot %bool %94
+               OpSelectionMerge %95 None
+               OpBranchConditional %92 %96 %95
+         %96 = OpLabel
+               OpBranch %89
+         %95 = OpLabel
+               OpStore %var_for_index %val_2
+        %100 = OpLoad %uint %i_1
+        %102 = OpAccessChain %_ptr_Function_Outer %arr_0 %100
+        %104 = OpLoad %uint %i_1
+        %106 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %104
+        %107 = OpLoad %Outer_std140 %106
+        %103 = OpFunctionCall %Outer %conv_Outer %107
+               OpStore %102 %103
+               OpBranch %90
+         %90 = OpLabel
+        %108 = OpLoad %uint %i_1
+        %109 = OpIAdd %uint %108 %uint_1
+               OpStore %i_1 %109
+               OpBranch %88
+         %89 = OpLabel
+        %110 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %110
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m = OpFunction %mat4v2float None %111
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %115 = OpLabel
+        %119 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
+        %122 = OpAccessChain %_ptr_Uniform_v2float %119 %uint_0
+        %123 = OpLoad %v2float %122
+        %125 = OpAccessChain %_ptr_Uniform_v2float %119 %uint_1
+        %126 = OpLoad %v2float %125
+        %129 = OpAccessChain %_ptr_Uniform_v2float %119 %uint_2
+        %130 = OpLoad %v2float %129
+        %133 = OpAccessChain %_ptr_Uniform_v2float %119 %uint_3
+        %134 = OpLoad %v2float %133
+        %135 = OpCompositeConstruct %mat4v2float %123 %126 %130 %134
+               OpReturnValue %135
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2 = OpFunction %v2float None %136
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %141 = OpLabel
+               OpSelectionMerge %142 None
+               OpSwitch %p2 %143 0 %144 1 %145 2 %146 3 %147
+        %144 = OpLabel
+        %148 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %149 = OpLoad %v2float %148
+               OpReturnValue %149
+        %145 = OpLabel
+        %150 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %151 = OpLoad %v2float %150
+               OpReturnValue %151
+        %146 = OpLabel
+        %152 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_2
+        %153 = OpLoad %v2float %152
+               OpReturnValue %153
+        %147 = OpLabel
+        %154 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_3
+        %155 = OpLoad %v2float %154
+               OpReturnValue %155
+        %143 = OpLabel
+               OpReturnValue %156
+        %142 = OpLabel
+               OpReturnValue %156
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %float None %157
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %163 = OpLabel
+               OpSelectionMerge %164 None
+               OpSwitch %p2_0 %165 0 %166 1 %167 2 %168 3 %169
+        %166 = OpLabel
+        %171 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %172 = OpLoad %float %171
+               OpReturnValue %172
+        %167 = OpLabel
+        %173 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %174 = OpLoad %float %173
+               OpReturnValue %174
+        %168 = OpLabel
+        %175 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_2 %p3
+        %176 = OpLoad %float %175
+               OpReturnValue %176
+        %169 = OpLabel
+        %177 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_3 %p3
+        %178 = OpLoad %float %177
+               OpReturnValue %178
+        %165 = OpLabel
+               OpReturnValue %179
+        %164 = OpLabel
+               OpReturnValue %179
+               OpFunctionEnd
+          %f = OpFunction %void None %180
+        %183 = OpLabel
+        %184 = OpFunctionCall %int %i
+        %185 = OpFunctionCall %int %i
+        %186 = OpFunctionCall %int %i
+        %189 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %190 = OpLoad %_arr_Outer_std140_uint_4 %189
+        %187 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %190
+        %193 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %184
+        %194 = OpLoad %Outer_std140 %193
+        %191 = OpFunctionCall %Outer %conv_Outer %194
+        %197 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %184 %uint_0
+        %198 = OpLoad %_arr_Inner_std140_uint_4 %197
+        %195 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %198
+        %200 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %184 %uint_0 %185
+        %201 = OpLoad %Inner_std140 %200
+        %199 = OpFunctionCall %Inner %conv_Inner %201
+        %203 = OpBitcast %uint %184
+        %204 = OpBitcast %uint %185
+        %202 = OpFunctionCall %mat4v2float %load_a_inner_p0_a_p1_m %203 %204
+        %206 = OpBitcast %uint %184
+        %207 = OpBitcast %uint %185
+        %208 = OpBitcast %uint %186
+        %205 = OpFunctionCall %v2float %load_a_inner_p0_a_p1_m_p2 %206 %207 %208
+        %209 = OpFunctionCall %int %i
+        %211 = OpBitcast %uint %184
+        %212 = OpBitcast %uint %185
+        %213 = OpBitcast %uint %186
+        %214 = OpBitcast %uint %209
+        %210 = OpFunctionCall %float %load_a_inner_p0_a_p1_m_p2_p3 %211 %212 %213 %214
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..8fc4c1a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+struct Inner {
+  @size(64)
+  m : mat4x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat4x2<f32> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec2<f32> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..b5b71dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,29 @@
+struct Inner {
+  @size(64)
+  m : mat4x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat4x2<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec2<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ec07ff8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,66 @@
+struct Inner {
+  float4x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float4x2 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float4x2 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float2 l_a_3_a_2_m_1 = asfloat(a[56].zw);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[56].z);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ec07ff8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,66 @@
+struct Inner {
+  float4x2 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float4x2 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float4x2 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float2 l_a_3_a_2_m_1 = asfloat(a[56].zw);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[56].z);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..45048a2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,94 @@
+#version 310 es
+
+struct Inner {
+  mat4x2 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Inner_std140 {
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_a_inner_3_a_2_m() {
+  return mat4x2(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1, a.inner[3u].a[2u].m_2, a.inner[3u].a[2u].m_3);
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  mat4x2 p_a_3_a_2_m = load_a_inner_3_a_2_m();
+  vec2 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  mat4x2 l_a_3_a_2_m = load_a_inner_3_a_2_m();
+  vec2 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..9eb83d5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float4x2 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  float4x2 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  float2 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..36d316d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,233 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 148
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpMemberName %Inner_std140 3 "m_3"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpMemberDecorate %Inner_std140 3 Offset 24
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%Inner_std140 = OpTypeStruct %v2float %v2float %v2float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+      %Inner = OpTypeStruct %mat4v2float
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %24 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %31 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %34 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %47 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %60 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %68 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %75 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %88 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %100 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %void = OpTypeVoid
+        %123 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v2float %val 0
+         %19 = OpCompositeExtract %v2float %val 1
+         %20 = OpCompositeExtract %v2float %val 2
+         %21 = OpCompositeExtract %v2float %val 3
+         %22 = OpCompositeConstruct %mat4v2float %18 %19 %20 %21
+         %23 = OpCompositeConstruct %Inner %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %28 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %31
+          %i = OpVariable %_ptr_Function_uint Function %34
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %47
+               OpBranch %35
+         %35 = OpLabel
+               OpLoopMerge %36 %37 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %uint %i
+         %41 = OpULessThan %bool %40 %uint_4
+         %39 = OpLogicalNot %bool %41
+               OpSelectionMerge %43 None
+               OpBranchConditional %39 %44 %43
+         %44 = OpLabel
+               OpBranch %36
+         %43 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_Inner %arr %48
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %52
+         %55 = OpLoad %Inner_std140 %54
+         %51 = OpFunctionCall %Inner %conv_Inner %55
+               OpStore %50 %51
+               OpBranch %37
+         %37 = OpLabel
+         %56 = OpLoad %uint %i
+         %58 = OpIAdd %uint %56 %uint_1
+               OpStore %i %58
+               OpBranch %35
+         %36 = OpLabel
+         %59 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %59
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %60
+      %val_1 = OpFunctionParameter %Outer_std140
+         %64 = OpLabel
+         %66 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %65 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %66
+         %67 = OpCompositeConstruct %Outer %65
+               OpReturnValue %67
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %68
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %72 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %75
+        %i_0 = OpVariable %_ptr_Function_uint Function %34
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %88
+               OpBranch %77
+         %77 = OpLabel
+               OpLoopMerge %78 %79 None
+               OpBranch %80
+         %80 = OpLabel
+         %82 = OpLoad %uint %i_0
+         %83 = OpULessThan %bool %82 %uint_4
+         %81 = OpLogicalNot %bool %83
+               OpSelectionMerge %84 None
+               OpBranchConditional %81 %85 %84
+         %85 = OpLabel
+               OpBranch %78
+         %84 = OpLabel
+               OpStore %var_for_index %val_2
+         %89 = OpLoad %uint %i_0
+         %91 = OpAccessChain %_ptr_Function_Outer %arr_0 %89
+         %93 = OpLoad %uint %i_0
+         %95 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %93
+         %96 = OpLoad %Outer_std140 %95
+         %92 = OpFunctionCall %Outer %conv_Outer %96
+               OpStore %91 %92
+               OpBranch %79
+         %79 = OpLabel
+         %97 = OpLoad %uint %i_0
+         %98 = OpIAdd %uint %97 %uint_1
+               OpStore %i_0 %98
+               OpBranch %77
+         %78 = OpLabel
+         %99 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %99
+               OpFunctionEnd
+%load_a_inner_3_a_2_m = OpFunction %mat4v2float None %100
+        %102 = OpLabel
+        %108 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %111 = OpAccessChain %_ptr_Uniform_v2float %108 %uint_0
+        %112 = OpLoad %v2float %111
+        %114 = OpAccessChain %_ptr_Uniform_v2float %108 %uint_1
+        %115 = OpLoad %v2float %114
+        %117 = OpAccessChain %_ptr_Uniform_v2float %108 %uint_2
+        %118 = OpLoad %v2float %117
+        %120 = OpAccessChain %_ptr_Uniform_v2float %108 %uint_3
+        %121 = OpLoad %v2float %120
+        %122 = OpCompositeConstruct %mat4v2float %112 %115 %118 %121
+               OpReturnValue %122
+               OpFunctionEnd
+          %f = OpFunction %void None %123
+        %126 = OpLabel
+        %129 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %130 = OpLoad %_arr_Outer_std140_uint_4 %129
+        %127 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %130
+        %133 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %134 = OpLoad %Outer_std140 %133
+        %131 = OpFunctionCall %Outer %conv_Outer %134
+        %137 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %138 = OpLoad %_arr_Inner_std140_uint_4 %137
+        %135 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %138
+        %140 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %141 = OpLoad %Inner_std140 %140
+        %139 = OpFunctionCall %Inner %conv_Inner %141
+        %142 = OpFunctionCall %mat4v2float %load_a_inner_3_a_2_m
+        %143 = OpAccessChain %_ptr_Uniform_v2float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %144 = OpLoad %v2float %143
+        %146 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %34
+        %147 = OpLoad %float %146
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..894d1f8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,27 @@
+struct Inner {
+  @size(64)
+  m : mat4x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat4x2<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec2<f32> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl
new file mode 100644
index 0000000..d3fa958
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].yx);
+    let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2d53757
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float4x2 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x4 t = transpose(tint_symbol(u, 264u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2d53757
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+float4x2 tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x4 t = transpose(tint_symbol(u, 264u));
+  const float l = length(asfloat(u[1].xy).yx);
+  const float a = abs(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..e469dd4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,80 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat4x2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+mat4x2 load_u_inner_2_m() {
+  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  mat2x4 t = transpose(load_u_inner_2_m());
+  float l = length(u.inner[0u].m_1.yx);
+  float a = abs(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..9684c69
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float4x2 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float2x4 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float2((*(tint_symbol))[0].m[1]).yx);
+  float const a = fabs(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..5df4a4b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,86 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 55
+; Schema: 0
+               OpCapability Shader
+         %45 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+         %11 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %36 = OpTypeFunction %void
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+         %46 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat4v2float None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_1
+         %24 = OpLoad %v2float %23
+         %26 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_2
+         %27 = OpLoad %v2float %26
+         %30 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_3
+         %31 = OpLoad %v2float %30
+         %33 = OpAccessChain %_ptr_Uniform_v2float %19 %uint_4
+         %34 = OpLoad %v2float %33
+         %35 = OpCompositeConstruct %mat4v2float %24 %27 %31 %34
+               OpReturnValue %35
+               OpFunctionEnd
+          %f = OpFunction %void None %36
+         %39 = OpLabel
+         %43 = OpFunctionCall %mat4v2float %load_u_inner_2_m
+         %40 = OpTranspose %mat2v4float %43
+         %47 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %46 %uint_2
+         %48 = OpLoad %v2float %47
+         %49 = OpVectorShuffle %v2float %48 %48 1 0
+         %44 = OpExtInst %float %45 Length %49
+         %51 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %46 %uint_2
+         %52 = OpLoad %v2float %51
+         %53 = OpVectorShuffle %v2float %52 %52 1 0
+         %54 = OpCompositeExtract %float %53 0
+         %50 = OpExtInst %float %45 FAbs %54
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..24e8b4a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].yx);
+  let a = abs(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl
new file mode 100644
index 0000000..1364229
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl

@@ -0,0 +1,23 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat4x2<f32>) {}
+fn d(v : vec2<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].yx);
+    e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..393469d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,64 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float4x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float4x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..393469d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,64 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float4x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+float4x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  d(asfloat(u[1].xy).yx);
+  e(asfloat(u[1].xy).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..0fa73a0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,111 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat4x2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat4x2 m) {
+}
+
+void d(vec2 v) {
+}
+
+void e(float f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_u_inner_2_m() {
+  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.yx);
+  e(u.inner[0u].m_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..e8c64c3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float4x2 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float4x2 m) {
+}
+
+void d(float2 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float2((*(tint_symbol))[0].m[1]).yx);
+  e(float2((*(tint_symbol))[0].m[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..5136314
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,211 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 128
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+%mat4v2float = OpTypeMatrix %v2float 4
+          %S = OpTypeStruct %int %mat4v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat4v2float
+         %27 = OpTypeFunction %void %v2float
+         %31 = OpTypeFunction %void %float
+         %35 = OpTypeFunction %S %S_std140
+         %47 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %53 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %56 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %69 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %82 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+        %105 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat4v2float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v2float
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %float
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v2float %val 1
+         %41 = OpCompositeExtract %v2float %val 2
+         %42 = OpCompositeExtract %v2float %val 3
+         %43 = OpCompositeExtract %v2float %val 4
+         %44 = OpCompositeConstruct %mat4v2float %40 %41 %42 %43
+         %45 = OpCompositeExtract %int %val 5
+         %46 = OpCompositeConstruct %S %39 %44 %45
+               OpReturnValue %46
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %47
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %50 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %53
+          %i = OpVariable %_ptr_Function_uint Function %56
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %69
+               OpBranch %57
+         %57 = OpLabel
+               OpLoopMerge %58 %59 None
+               OpBranch %60
+         %60 = OpLabel
+         %62 = OpLoad %uint %i
+         %63 = OpULessThan %bool %62 %uint_4
+         %61 = OpLogicalNot %bool %63
+               OpSelectionMerge %65 None
+               OpBranchConditional %61 %66 %65
+         %66 = OpLabel
+               OpBranch %58
+         %65 = OpLabel
+               OpStore %var_for_index %val_0
+         %70 = OpLoad %uint %i
+         %72 = OpAccessChain %_ptr_Function_S %arr %70
+         %74 = OpLoad %uint %i
+         %76 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %74
+         %77 = OpLoad %S_std140 %76
+         %73 = OpFunctionCall %S %conv_S %77
+               OpStore %72 %73
+               OpBranch %59
+         %59 = OpLabel
+         %78 = OpLoad %uint %i
+         %80 = OpIAdd %uint %78 %uint_1
+               OpStore %i %80
+               OpBranch %57
+         %58 = OpLabel
+         %81 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %81
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v2float None %82
+         %84 = OpLabel
+         %89 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %92 = OpAccessChain %_ptr_Uniform_v2float %89 %uint_1
+         %93 = OpLoad %v2float %92
+         %95 = OpAccessChain %_ptr_Uniform_v2float %89 %uint_2
+         %96 = OpLoad %v2float %95
+         %99 = OpAccessChain %_ptr_Uniform_v2float %89 %uint_3
+        %100 = OpLoad %v2float %99
+        %102 = OpAccessChain %_ptr_Uniform_v2float %89 %uint_4
+        %103 = OpLoad %v2float %102
+        %104 = OpCompositeConstruct %mat4v2float %93 %96 %100 %103
+               OpReturnValue %104
+               OpFunctionEnd
+          %f = OpFunction %void None %105
+        %107 = OpLabel
+        %111 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %112 = OpLoad %_arr_S_std140_uint_4 %111
+        %109 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %112
+        %108 = OpFunctionCall %void %a %109
+        %115 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %116 = OpLoad %S_std140 %115
+        %114 = OpFunctionCall %S %conv_S %116
+        %113 = OpFunctionCall %void %b %114
+        %118 = OpFunctionCall %mat4v2float %load_u_inner_2_m
+        %117 = OpFunctionCall %void %c %118
+        %120 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %56 %uint_2
+        %121 = OpLoad %v2float %120
+        %122 = OpVectorShuffle %v2float %121 %121 1 0
+        %119 = OpFunctionCall %void %d %122
+        %124 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %56 %uint_2
+        %125 = OpLoad %v2float %124
+        %126 = OpVectorShuffle %v2float %125 %125 1 0
+        %127 = OpCompositeExtract %float %126 0
+        %123 = OpFunctionCall %void %e %127
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..9f3741e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,32 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat4x2<f32>) {
+}
+
+fn d(v : vec2<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].yx);
+  e(u[0].m[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl
new file mode 100644
index 0000000..d6ea0aa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..60c112b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,49 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float4x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..60c112b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,49 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+float4x2 tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  p[1].m[0] = asfloat(u[1].xy).yx;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..bdcf50b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,96 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat4x2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_u_inner_2_m() {
+  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..017641a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float4x2 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..9b3c6b0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,178 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 110
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+          %S = OpTypeStruct %int %mat4v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %86 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat4v2float = OpTypePointer Private %mat4v2float
+        %104 = OpConstantNull %int
+%_ptr_Private_v2float = OpTypePointer Private %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeExtract %v2float %val 3
+         %25 = OpCompositeExtract %v2float %val 4
+         %26 = OpCompositeConstruct %mat4v2float %22 %23 %24 %25
+         %27 = OpCompositeExtract %int %val 5
+         %28 = OpCompositeConstruct %S %21 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v2float None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_1
+         %74 = OpLoad %v2float %73
+         %76 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_2
+         %77 = OpLoad %v2float %76
+         %80 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_3
+         %81 = OpLoad %v2float %80
+         %83 = OpAccessChain %_ptr_Uniform_v2float %70 %uint_4
+         %84 = OpLoad %v2float %83
+         %85 = OpCompositeConstruct %mat4v2float %74 %77 %81 %84
+               OpReturnValue %85
+               OpFunctionEnd
+          %f = OpFunction %void None %86
+         %89 = OpLabel
+         %92 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %93 = OpLoad %_arr_S_std140_uint_4 %92
+         %90 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %93
+               OpStore %p %90
+         %96 = OpAccessChain %_ptr_Private_S %p %int_1
+         %98 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %99 = OpLoad %S_std140 %98
+         %97 = OpFunctionCall %S %conv_S %99
+               OpStore %96 %97
+        %102 = OpAccessChain %_ptr_Private_mat4v2float %p %int_3 %uint_1
+        %103 = OpFunctionCall %mat4v2float %load_u_inner_2_m
+               OpStore %102 %103
+        %106 = OpAccessChain %_ptr_Private_v2float %p %int_1 %uint_1 %104
+        %107 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %37 %uint_2
+        %108 = OpLoad %v2float %107
+        %109 = OpVectorShuffle %v2float %108 %108 1 0
+               OpStore %106 %109
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..e2e9e62
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl
new file mode 100644
index 0000000..df4ecdc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5dc514f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,71 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float4x2 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  s.Store2(136u, asuint(asfloat(u[1].xy).yx));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5dc514f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,71 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float4x2 value) {
+  buffer.Store2((offset + 0u), asuint(value[0u]));
+  buffer.Store2((offset + 8u), asuint(value[1u]));
+  buffer.Store2((offset + 16u), asuint(value[2u]));
+  buffer.Store2((offset + 24u), asuint(value[3u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+float4x2 tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  s.Store2(136u, asuint(asfloat(u[1].xy).yx));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..f0e1ba9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,99 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat4x2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_u_inner_2_m() {
+  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..73076ac
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float4x2 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float2((*(tint_symbol_1))[0].m[1]).yx;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..a640f22
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,187 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 113
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+          %S = OpTypeStruct %int %mat4v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %35 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %38 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %51 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %64 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %87 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat4v2float = OpTypePointer StorageBuffer %mat4v2float
+        %107 = OpConstantNull %int
+%_ptr_StorageBuffer_v2float = OpTypePointer StorageBuffer %v2float
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v2float %val 1
+         %23 = OpCompositeExtract %v2float %val 2
+         %24 = OpCompositeExtract %v2float %val 3
+         %25 = OpCompositeExtract %v2float %val 4
+         %26 = OpCompositeConstruct %mat4v2float %22 %23 %24 %25
+         %27 = OpCompositeExtract %int %val 5
+         %28 = OpCompositeConstruct %S %21 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
+          %i = OpVariable %_ptr_Function_uint Function %38
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
+               OpBranch %39
+         %39 = OpLabel
+               OpLoopMerge %40 %41 None
+               OpBranch %42
+         %42 = OpLabel
+         %44 = OpLoad %uint %i
+         %45 = OpULessThan %bool %44 %uint_4
+         %43 = OpLogicalNot %bool %45
+               OpSelectionMerge %47 None
+               OpBranchConditional %43 %48 %47
+         %48 = OpLabel
+               OpBranch %40
+         %47 = OpLabel
+               OpStore %var_for_index %val_0
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_S %arr %52
+         %56 = OpLoad %uint %i
+         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
+         %59 = OpLoad %S_std140 %58
+         %55 = OpFunctionCall %S %conv_S %59
+               OpStore %54 %55
+               OpBranch %41
+         %41 = OpLabel
+         %60 = OpLoad %uint %i
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %i %62
+               OpBranch %39
+         %40 = OpLabel
+         %63 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %63
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v2float None %64
+         %66 = OpLabel
+         %71 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %74 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_1
+         %75 = OpLoad %v2float %74
+         %77 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_2
+         %78 = OpLoad %v2float %77
+         %81 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_3
+         %82 = OpLoad %v2float %81
+         %84 = OpAccessChain %_ptr_Uniform_v2float %71 %uint_4
+         %85 = OpLoad %v2float %84
+         %86 = OpCompositeConstruct %mat4v2float %75 %78 %82 %85
+               OpReturnValue %86
+               OpFunctionEnd
+          %f = OpFunction %void None %87
+         %90 = OpLabel
+         %92 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %95 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %96 = OpLoad %_arr_S_std140_uint_4 %95
+         %93 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %96
+               OpStore %92 %93
+         %99 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+        %101 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %102 = OpLoad %S_std140 %101
+        %100 = OpFunctionCall %S %conv_S %102
+               OpStore %99 %100
+        %105 = OpAccessChain %_ptr_StorageBuffer_mat4v2float %s %uint_0 %int_3 %uint_1
+        %106 = OpFunctionCall %mat4v2float %load_u_inner_2_m
+               OpStore %105 %106
+        %109 = OpAccessChain %_ptr_StorageBuffer_v2float %s %uint_0 %int_1 %uint_1 %107
+        %110 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %38 %uint_2
+        %111 = OpLoad %v2float %110
+        %112 = OpVectorShuffle %v2float %111 %111 1 0
+               OpStore %109 %112
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..b32e1a4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..8e75a9b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..94f8f11
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x2 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..94f8f11
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,65 @@
+struct S {
+  int before;
+  float4x2 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x2 tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  w[1].m[0] = asfloat(u[1].xy).yx;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..05df402
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,104 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  mat4x2 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  vec2 m_0;
+  vec2 m_1;
+  vec2 m_2;
+  vec2 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, mat4x2(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, mat4x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+mat4x2 load_u_inner_2_m() {
+  return mat4x2(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, mat4x2(vec2(0.0f), vec2(0.0f), vec2(0.0f), vec2(0.0f)), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.yx;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..02501da
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ float4x2 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float2((*(tint_symbol_2))[0].m[1]).yx;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..1a26894
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,221 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 135
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+   %S_std140 = OpTypeStruct %int %v2float %v2float %v2float %v2float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+          %S = OpTypeStruct %int %mat4v2float %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %30 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %36 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %39 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %52 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %65 = OpTypeFunction %mat4v2float
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %88 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+        %106 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat4v2float = OpTypePointer Workgroup %mat4v2float
+        %124 = OpConstantNull %int
+%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float
+        %130 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v2float %val 1
+         %24 = OpCompositeExtract %v2float %val 2
+         %25 = OpCompositeExtract %v2float %val 3
+         %26 = OpCompositeExtract %v2float %val 4
+         %27 = OpCompositeConstruct %mat4v2float %23 %24 %25 %26
+         %28 = OpCompositeExtract %int %val 5
+         %29 = OpCompositeConstruct %S %22 %27 %28
+               OpReturnValue %29
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %30
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %33 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %36
+          %i = OpVariable %_ptr_Function_uint Function %39
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %52
+               OpBranch %40
+         %40 = OpLabel
+               OpLoopMerge %41 %42 None
+               OpBranch %43
+         %43 = OpLabel
+         %45 = OpLoad %uint %i
+         %46 = OpULessThan %bool %45 %uint_4
+         %44 = OpLogicalNot %bool %46
+               OpSelectionMerge %48 None
+               OpBranchConditional %44 %49 %48
+         %49 = OpLabel
+               OpBranch %41
+         %48 = OpLabel
+               OpStore %var_for_index %val_0
+         %53 = OpLoad %uint %i
+         %55 = OpAccessChain %_ptr_Function_S %arr %53
+         %57 = OpLoad %uint %i
+         %59 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %57
+         %60 = OpLoad %S_std140 %59
+         %56 = OpFunctionCall %S %conv_S %60
+               OpStore %55 %56
+               OpBranch %42
+         %42 = OpLabel
+         %61 = OpLoad %uint %i
+         %63 = OpIAdd %uint %61 %uint_1
+               OpStore %i %63
+               OpBranch %40
+         %41 = OpLabel
+         %64 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %64
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v2float None %65
+         %67 = OpLabel
+         %72 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %75 = OpAccessChain %_ptr_Uniform_v2float %72 %uint_1
+         %76 = OpLoad %v2float %75
+         %78 = OpAccessChain %_ptr_Uniform_v2float %72 %uint_2
+         %79 = OpLoad %v2float %78
+         %82 = OpAccessChain %_ptr_Uniform_v2float %72 %uint_3
+         %83 = OpLoad %v2float %82
+         %85 = OpAccessChain %_ptr_Uniform_v2float %72 %uint_4
+         %86 = OpLoad %v2float %85
+         %87 = OpCompositeConstruct %mat4v2float %76 %79 %83 %86
+               OpReturnValue %87
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %88
+%local_invocation_index = OpFunctionParameter %uint
+         %92 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %39
+               OpStore %idx %local_invocation_index
+               OpBranch %94
+         %94 = OpLabel
+               OpLoopMerge %95 %96 None
+               OpBranch %97
+         %97 = OpLabel
+         %99 = OpLoad %uint %idx
+        %100 = OpULessThan %bool %99 %uint_4
+         %98 = OpLogicalNot %bool %100
+               OpSelectionMerge %101 None
+               OpBranchConditional %98 %102 %101
+        %102 = OpLabel
+               OpBranch %95
+        %101 = OpLabel
+        %103 = OpLoad %uint %idx
+        %105 = OpAccessChain %_ptr_Workgroup_S %w %103
+               OpStore %105 %106
+               OpBranch %96
+         %96 = OpLabel
+        %107 = OpLoad %uint %idx
+        %108 = OpIAdd %uint %107 %uint_1
+               OpStore %idx %108
+               OpBranch %94
+         %95 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %113 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %114 = OpLoad %_arr_S_std140_uint_4 %113
+        %111 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %114
+               OpStore %w %111
+        %116 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %118 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %119 = OpLoad %S_std140 %118
+        %117 = OpFunctionCall %S %conv_S %119
+               OpStore %116 %117
+        %122 = OpAccessChain %_ptr_Workgroup_mat4v2float %w %int_3 %uint_1
+        %123 = OpFunctionCall %mat4v2float %load_u_inner_2_m
+               OpStore %122 %123
+        %126 = OpAccessChain %_ptr_Workgroup_v2float %w %int_1 %uint_1 %124
+        %127 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0 %39 %uint_2
+        %128 = OpLoad %v2float %127
+        %129 = OpVectorShuffle %v2float %128 %128 1 0
+               OpStore %126 %129
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %130
+        %132 = OpLabel
+        %134 = OpLoad %uint %local_invocation_index_1
+        %133 = OpFunctionCall %void %f_inner %134
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..dd47e56
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x2_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat4x2<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].yx;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..d66d3ac
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat4x3<f16>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec3<f16>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f16             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..96d9ebc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,98 @@
+struct Inner {
+  matrix<float16_t, 4, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 3> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 4, 3> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_9 = a[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_i_a_i_m_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ac36ddd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,103 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 4, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 3> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 4, 3> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_9 = a[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_i_a_i_m_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000236CB5C19B0(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..3a626b7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,157 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat4x3 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Inner_std140 {
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  f16vec3 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat4x3(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x3 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return f16mat4x3(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1, a.inner[s_save].a[s_save_1].m_2, a.inner[s_save].a[s_save_1].m_3);
+}
+
+f16vec3 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2;
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].a[p1].m_3;
+      break;
+    }
+    default: {
+      return f16vec3(0.0hf);
+      break;
+    }
+  }
+}
+
+float16_t load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2[p3];
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].a[p1].m_3[p3];
+      break;
+    }
+    default: {
+      return 0.0hf;
+      break;
+    }
+  }
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat4x3 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  f16vec3 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat4x3 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  f16vec3 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float16_t l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..1369d42
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half4x3 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  half4x3 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  half3 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  half const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..0b6a55d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,338 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 215
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpMemberName %Inner_std140 3 "m_3"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpMemberDecorate %Inner_std140 3 Offset 24
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%Inner_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v3half = OpTypeMatrix %v3half 4
+      %Inner = OpTypeStruct %mat4v3half
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %35 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %42 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %45 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %58 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %71 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %79 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %86 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %99 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %111 = OpTypeFunction %mat4v3half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+        %136 = OpTypeFunction %v3half %uint %uint %uint
+        %156 = OpConstantNull %v3half
+        %157 = OpTypeFunction %half %uint %uint %uint %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %179 = OpConstantNull %half
+       %void = OpTypeVoid
+        %180 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v3half %val 0
+         %30 = OpCompositeExtract %v3half %val 1
+         %31 = OpCompositeExtract %v3half %val 2
+         %32 = OpCompositeExtract %v3half %val 3
+         %33 = OpCompositeConstruct %mat4v3half %29 %30 %31 %32
+         %34 = OpCompositeConstruct %Inner %33
+               OpReturnValue %34
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %35
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %39 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %42
+        %i_0 = OpVariable %_ptr_Function_uint Function %45
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %58
+               OpBranch %46
+         %46 = OpLabel
+               OpLoopMerge %47 %48 None
+               OpBranch %49
+         %49 = OpLabel
+         %51 = OpLoad %uint %i_0
+         %52 = OpULessThan %bool %51 %uint_4
+         %50 = OpLogicalNot %bool %52
+               OpSelectionMerge %54 None
+               OpBranchConditional %50 %55 %54
+         %55 = OpLabel
+               OpBranch %47
+         %54 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %59 = OpLoad %uint %i_0
+         %61 = OpAccessChain %_ptr_Function_Inner %arr %59
+         %63 = OpLoad %uint %i_0
+         %65 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %63
+         %66 = OpLoad %Inner_std140 %65
+         %62 = OpFunctionCall %Inner %conv_Inner %66
+               OpStore %61 %62
+               OpBranch %48
+         %48 = OpLabel
+         %67 = OpLoad %uint %i_0
+         %69 = OpIAdd %uint %67 %uint_1
+               OpStore %i_0 %69
+               OpBranch %46
+         %47 = OpLabel
+         %70 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %70
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %71
+      %val_1 = OpFunctionParameter %Outer_std140
+         %75 = OpLabel
+         %77 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %76 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %77
+         %78 = OpCompositeConstruct %Outer %76
+               OpReturnValue %78
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %79
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %83 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %86
+        %i_1 = OpVariable %_ptr_Function_uint Function %45
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %99
+               OpBranch %88
+         %88 = OpLabel
+               OpLoopMerge %89 %90 None
+               OpBranch %91
+         %91 = OpLabel
+         %93 = OpLoad %uint %i_1
+         %94 = OpULessThan %bool %93 %uint_4
+         %92 = OpLogicalNot %bool %94
+               OpSelectionMerge %95 None
+               OpBranchConditional %92 %96 %95
+         %96 = OpLabel
+               OpBranch %89
+         %95 = OpLabel
+               OpStore %var_for_index %val_2
+        %100 = OpLoad %uint %i_1
+        %102 = OpAccessChain %_ptr_Function_Outer %arr_0 %100
+        %104 = OpLoad %uint %i_1
+        %106 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %104
+        %107 = OpLoad %Outer_std140 %106
+        %103 = OpFunctionCall %Outer %conv_Outer %107
+               OpStore %102 %103
+               OpBranch %90
+         %90 = OpLabel
+        %108 = OpLoad %uint %i_1
+        %109 = OpIAdd %uint %108 %uint_1
+               OpStore %i_1 %109
+               OpBranch %88
+         %89 = OpLabel
+        %110 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %110
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m = OpFunction %mat4v3half None %111
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %115 = OpLabel
+        %119 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
+        %122 = OpAccessChain %_ptr_Uniform_v3half %119 %uint_0
+        %123 = OpLoad %v3half %122
+        %125 = OpAccessChain %_ptr_Uniform_v3half %119 %uint_1
+        %126 = OpLoad %v3half %125
+        %129 = OpAccessChain %_ptr_Uniform_v3half %119 %uint_2
+        %130 = OpLoad %v3half %129
+        %133 = OpAccessChain %_ptr_Uniform_v3half %119 %uint_3
+        %134 = OpLoad %v3half %133
+        %135 = OpCompositeConstruct %mat4v3half %123 %126 %130 %134
+               OpReturnValue %135
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2 = OpFunction %v3half None %136
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %141 = OpLabel
+               OpSelectionMerge %142 None
+               OpSwitch %p2 %143 0 %144 1 %145 2 %146 3 %147
+        %144 = OpLabel
+        %148 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %149 = OpLoad %v3half %148
+               OpReturnValue %149
+        %145 = OpLabel
+        %150 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %151 = OpLoad %v3half %150
+               OpReturnValue %151
+        %146 = OpLabel
+        %152 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_2
+        %153 = OpLoad %v3half %152
+               OpReturnValue %153
+        %147 = OpLabel
+        %154 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_3
+        %155 = OpLoad %v3half %154
+               OpReturnValue %155
+        %143 = OpLabel
+               OpReturnValue %156
+        %142 = OpLabel
+               OpReturnValue %156
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %half None %157
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %163 = OpLabel
+               OpSelectionMerge %164 None
+               OpSwitch %p2_0 %165 0 %166 1 %167 2 %168 3 %169
+        %166 = OpLabel
+        %171 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %172 = OpLoad %half %171
+               OpReturnValue %172
+        %167 = OpLabel
+        %173 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %174 = OpLoad %half %173
+               OpReturnValue %174
+        %168 = OpLabel
+        %175 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_2 %p3
+        %176 = OpLoad %half %175
+               OpReturnValue %176
+        %169 = OpLabel
+        %177 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_3 %p3
+        %178 = OpLoad %half %177
+               OpReturnValue %178
+        %165 = OpLabel
+               OpReturnValue %179
+        %164 = OpLabel
+               OpReturnValue %179
+               OpFunctionEnd
+          %f = OpFunction %void None %180
+        %183 = OpLabel
+        %184 = OpFunctionCall %int %i
+        %185 = OpFunctionCall %int %i
+        %186 = OpFunctionCall %int %i
+        %189 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %190 = OpLoad %_arr_Outer_std140_uint_4 %189
+        %187 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %190
+        %193 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %184
+        %194 = OpLoad %Outer_std140 %193
+        %191 = OpFunctionCall %Outer %conv_Outer %194
+        %197 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %184 %uint_0
+        %198 = OpLoad %_arr_Inner_std140_uint_4 %197
+        %195 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %198
+        %200 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %184 %uint_0 %185
+        %201 = OpLoad %Inner_std140 %200
+        %199 = OpFunctionCall %Inner %conv_Inner %201
+        %203 = OpBitcast %uint %184
+        %204 = OpBitcast %uint %185
+        %202 = OpFunctionCall %mat4v3half %load_a_inner_p0_a_p1_m %203 %204
+        %206 = OpBitcast %uint %184
+        %207 = OpBitcast %uint %185
+        %208 = OpBitcast %uint %186
+        %205 = OpFunctionCall %v3half %load_a_inner_p0_a_p1_m_p2 %206 %207 %208
+        %209 = OpFunctionCall %int %i
+        %211 = OpBitcast %uint %184
+        %212 = OpBitcast %uint %185
+        %213 = OpBitcast %uint %186
+        %214 = OpBitcast %uint %209
+        %210 = OpFunctionCall %half %load_a_inner_p0_a_p1_m_p2_p3 %211 %212 %213 %214
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..4ac0990
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,36 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat4x3<f16> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec3<f16> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f16 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..efd61d8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,31 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat4x3<f16>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec3<f16>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f16             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7397fe3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,81 @@
+struct Inner {
+  matrix<float16_t, 4, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 4, 3> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 4, 3> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_8 = a[56].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_3_a_2_m_1 = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..3cc9dd2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,86 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 4, 3> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 4, 3> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 4, 3> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_8 = a[56].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_3_a_2_m_1 = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001CF48DE1160(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..bb6dc3e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,95 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat4x3 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Inner_std140 {
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  f16vec3 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat4x3(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x3 load_a_inner_3_a_2_m() {
+  return f16mat4x3(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1, a.inner[3u].a[2u].m_2, a.inner[3u].a[2u].m_3);
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat4x3 p_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec3 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat4x3 l_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec3 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float16_t l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..ee0c952
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half4x3 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  half4x3 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  half3 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  half const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..507f1c0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,237 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 148
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpMemberName %Inner_std140 3 "m_3"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpMemberDecorate %Inner_std140 3 Offset 24
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%Inner_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+      %Inner = OpTypeStruct %mat4v3half
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %24 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %31 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %34 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %47 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %60 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %68 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %75 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %88 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %100 = OpTypeFunction %mat4v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+       %void = OpTypeVoid
+        %123 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v3half %val 0
+         %19 = OpCompositeExtract %v3half %val 1
+         %20 = OpCompositeExtract %v3half %val 2
+         %21 = OpCompositeExtract %v3half %val 3
+         %22 = OpCompositeConstruct %mat4v3half %18 %19 %20 %21
+         %23 = OpCompositeConstruct %Inner %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %28 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %31
+          %i = OpVariable %_ptr_Function_uint Function %34
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %47
+               OpBranch %35
+         %35 = OpLabel
+               OpLoopMerge %36 %37 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %uint %i
+         %41 = OpULessThan %bool %40 %uint_4
+         %39 = OpLogicalNot %bool %41
+               OpSelectionMerge %43 None
+               OpBranchConditional %39 %44 %43
+         %44 = OpLabel
+               OpBranch %36
+         %43 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_Inner %arr %48
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %52
+         %55 = OpLoad %Inner_std140 %54
+         %51 = OpFunctionCall %Inner %conv_Inner %55
+               OpStore %50 %51
+               OpBranch %37
+         %37 = OpLabel
+         %56 = OpLoad %uint %i
+         %58 = OpIAdd %uint %56 %uint_1
+               OpStore %i %58
+               OpBranch %35
+         %36 = OpLabel
+         %59 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %59
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %60
+      %val_1 = OpFunctionParameter %Outer_std140
+         %64 = OpLabel
+         %66 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %65 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %66
+         %67 = OpCompositeConstruct %Outer %65
+               OpReturnValue %67
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %68
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %72 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %75
+        %i_0 = OpVariable %_ptr_Function_uint Function %34
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %88
+               OpBranch %77
+         %77 = OpLabel
+               OpLoopMerge %78 %79 None
+               OpBranch %80
+         %80 = OpLabel
+         %82 = OpLoad %uint %i_0
+         %83 = OpULessThan %bool %82 %uint_4
+         %81 = OpLogicalNot %bool %83
+               OpSelectionMerge %84 None
+               OpBranchConditional %81 %85 %84
+         %85 = OpLabel
+               OpBranch %78
+         %84 = OpLabel
+               OpStore %var_for_index %val_2
+         %89 = OpLoad %uint %i_0
+         %91 = OpAccessChain %_ptr_Function_Outer %arr_0 %89
+         %93 = OpLoad %uint %i_0
+         %95 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %93
+         %96 = OpLoad %Outer_std140 %95
+         %92 = OpFunctionCall %Outer %conv_Outer %96
+               OpStore %91 %92
+               OpBranch %79
+         %79 = OpLabel
+         %97 = OpLoad %uint %i_0
+         %98 = OpIAdd %uint %97 %uint_1
+               OpStore %i_0 %98
+               OpBranch %77
+         %78 = OpLabel
+         %99 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %99
+               OpFunctionEnd
+%load_a_inner_3_a_2_m = OpFunction %mat4v3half None %100
+        %102 = OpLabel
+        %108 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %111 = OpAccessChain %_ptr_Uniform_v3half %108 %uint_0
+        %112 = OpLoad %v3half %111
+        %114 = OpAccessChain %_ptr_Uniform_v3half %108 %uint_1
+        %115 = OpLoad %v3half %114
+        %117 = OpAccessChain %_ptr_Uniform_v3half %108 %uint_2
+        %118 = OpLoad %v3half %117
+        %120 = OpAccessChain %_ptr_Uniform_v3half %108 %uint_3
+        %121 = OpLoad %v3half %120
+        %122 = OpCompositeConstruct %mat4v3half %112 %115 %118 %121
+               OpReturnValue %122
+               OpFunctionEnd
+          %f = OpFunction %void None %123
+        %126 = OpLabel
+        %129 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %130 = OpLoad %_arr_Outer_std140_uint_4 %129
+        %127 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %130
+        %133 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %134 = OpLoad %Outer_std140 %133
+        %131 = OpFunctionCall %Outer %conv_Outer %134
+        %137 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %138 = OpLoad %_arr_Inner_std140_uint_4 %137
+        %135 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %138
+        %140 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %141 = OpLoad %Inner_std140 %140
+        %139 = OpFunctionCall %Inner %conv_Inner %141
+        %142 = OpFunctionCall %mat4v3half %load_a_inner_3_a_2_m
+        %143 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %144 = OpLoad %v3half %143
+        %146 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %34
+        %147 = OpLoad %half %146
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..059ae45
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,29 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat4x3<f16> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec3<f16> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f16 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl
new file mode 100644
index 0000000..6bde739
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].zxy);
+    let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3123f10
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 4> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[1].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..1f18e46
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 4> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[1].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000023590C3D7E0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..e8870bc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,81 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat4x3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  f16vec3 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+f16mat4x3 load_u_inner_2_m() {
+  return f16mat4x3(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  f16mat3x4 t = transpose(load_u_inner_2_m());
+  float16_t l = length(u.inner[0u].m_1.zxy);
+  float16_t a = abs(u.inner[0u].m_1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..3a831cd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half4x3 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  half3x4 const t = transpose((*(tint_symbol))[2].m);
+  half const l = length(half3((*(tint_symbol))[0].m[1]).zxy);
+  half const a = fabs(half3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..a5700e9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 55
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %45 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+         %11 = OpTypeFunction %mat4v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %36 = OpTypeFunction %void
+     %v4half = OpTypeVector %half 4
+ %mat3v4half = OpTypeMatrix %v4half 3
+         %46 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat4v3half None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v3half %19 %uint_1
+         %24 = OpLoad %v3half %23
+         %26 = OpAccessChain %_ptr_Uniform_v3half %19 %uint_2
+         %27 = OpLoad %v3half %26
+         %30 = OpAccessChain %_ptr_Uniform_v3half %19 %uint_3
+         %31 = OpLoad %v3half %30
+         %33 = OpAccessChain %_ptr_Uniform_v3half %19 %uint_4
+         %34 = OpLoad %v3half %33
+         %35 = OpCompositeConstruct %mat4v3half %24 %27 %31 %34
+               OpReturnValue %35
+               OpFunctionEnd
+          %f = OpFunction %void None %36
+         %39 = OpLabel
+         %43 = OpFunctionCall %mat4v3half %load_u_inner_2_m
+         %40 = OpTranspose %mat3v4half %43
+         %47 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %46 %uint_2
+         %48 = OpLoad %v3half %47
+         %49 = OpVectorShuffle %v3half %48 %48 2 0 1
+         %44 = OpExtInst %half %45 Length %49
+         %51 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %46 %uint_2
+         %52 = OpLoad %v3half %51
+         %53 = OpVectorShuffle %v3half %52 %52 2 0 1
+         %54 = OpCompositeExtract %half %53 0
+         %50 = OpExtInst %half %45 FAbs %54
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..6bd1807
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].zxy);
+  let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl
new file mode 100644
index 0000000..ab15ad4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl

@@ -0,0 +1,25 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat4x3<f16>) {}
+fn d(v : vec3<f16>) {}
+fn e(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].zxy);
+    e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ef7574b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,82 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 4, 3> m) {
+}
+
+void d(vector<float16_t, 3> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[1].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  e(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ca1bbc5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,87 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 4, 3> m) {
+}
+
+void d(vector<float16_t, 3> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[1].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  e(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FF77F70B40(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..bb2bc12
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,112 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat4x3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  f16vec3 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(f16mat4x3 m) {
+}
+
+void d(f16vec3 v) {
+}
+
+void e(float16_t f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat4x3(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x3 load_u_inner_2_m() {
+  return f16mat4x3(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.zxy);
+  e(u.inner[0u].m_1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..821e522
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half4x3 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(half4x3 m) {
+}
+
+void d(half3 v) {
+}
+
+void e(half f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(half3((*(tint_symbol))[0].m[1]).zxy);
+  e(half3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..610fee7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,215 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 128
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat4v3half = OpTypeMatrix %v3half 4
+          %S = OpTypeStruct %int %mat4v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat4v3half
+         %27 = OpTypeFunction %void %v3half
+         %31 = OpTypeFunction %void %half
+         %35 = OpTypeFunction %S %S_std140
+         %47 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %53 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %56 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %69 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %82 = OpTypeFunction %mat4v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_3 = OpConstant %uint 3
+        %105 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat4v3half
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v3half
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %half
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v3half %val 1
+         %41 = OpCompositeExtract %v3half %val 2
+         %42 = OpCompositeExtract %v3half %val 3
+         %43 = OpCompositeExtract %v3half %val 4
+         %44 = OpCompositeConstruct %mat4v3half %40 %41 %42 %43
+         %45 = OpCompositeExtract %int %val 5
+         %46 = OpCompositeConstruct %S %39 %44 %45
+               OpReturnValue %46
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %47
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %50 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %53
+          %i = OpVariable %_ptr_Function_uint Function %56
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %69
+               OpBranch %57
+         %57 = OpLabel
+               OpLoopMerge %58 %59 None
+               OpBranch %60
+         %60 = OpLabel
+         %62 = OpLoad %uint %i
+         %63 = OpULessThan %bool %62 %uint_4
+         %61 = OpLogicalNot %bool %63
+               OpSelectionMerge %65 None
+               OpBranchConditional %61 %66 %65
+         %66 = OpLabel
+               OpBranch %58
+         %65 = OpLabel
+               OpStore %var_for_index %val_0
+         %70 = OpLoad %uint %i
+         %72 = OpAccessChain %_ptr_Function_S %arr %70
+         %74 = OpLoad %uint %i
+         %76 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %74
+         %77 = OpLoad %S_std140 %76
+         %73 = OpFunctionCall %S %conv_S %77
+               OpStore %72 %73
+               OpBranch %59
+         %59 = OpLabel
+         %78 = OpLoad %uint %i
+         %80 = OpIAdd %uint %78 %uint_1
+               OpStore %i %80
+               OpBranch %57
+         %58 = OpLabel
+         %81 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %81
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v3half None %82
+         %84 = OpLabel
+         %89 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %92 = OpAccessChain %_ptr_Uniform_v3half %89 %uint_1
+         %93 = OpLoad %v3half %92
+         %95 = OpAccessChain %_ptr_Uniform_v3half %89 %uint_2
+         %96 = OpLoad %v3half %95
+         %99 = OpAccessChain %_ptr_Uniform_v3half %89 %uint_3
+        %100 = OpLoad %v3half %99
+        %102 = OpAccessChain %_ptr_Uniform_v3half %89 %uint_4
+        %103 = OpLoad %v3half %102
+        %104 = OpCompositeConstruct %mat4v3half %93 %96 %100 %103
+               OpReturnValue %104
+               OpFunctionEnd
+          %f = OpFunction %void None %105
+        %107 = OpLabel
+        %111 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %112 = OpLoad %_arr_S_std140_uint_4 %111
+        %109 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %112
+        %108 = OpFunctionCall %void %a %109
+        %115 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %116 = OpLoad %S_std140 %115
+        %114 = OpFunctionCall %S %conv_S %116
+        %113 = OpFunctionCall %void %b %114
+        %118 = OpFunctionCall %mat4v3half %load_u_inner_2_m
+        %117 = OpFunctionCall %void %c %118
+        %120 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %56 %uint_2
+        %121 = OpLoad %v3half %120
+        %122 = OpVectorShuffle %v3half %121 %121 2 0 1
+        %119 = OpFunctionCall %void %d %122
+        %124 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %56 %uint_2
+        %125 = OpLoad %v3half %124
+        %126 = OpVectorShuffle %v3half %125 %125 2 0 1
+        %127 = OpCompositeExtract %half %126 0
+        %123 = OpFunctionCall %void %e %127
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..a78087f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat4x3<f16>) {
+}
+
+fn d(v : vec3<f16>) {
+}
+
+fn e(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].zxy);
+  e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl
new file mode 100644
index 0000000..08f8f23
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..d2aacee
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,64 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 4, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  p[1].m[0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..966dd5f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,69 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 4, 3> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  p[1].m[0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000019CC8F77C00(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..747a2c5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,97 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat4x3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  f16vec3 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat4x3(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x3 load_u_inner_2_m() {
+  return f16mat4x3(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..175ed33
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half4x3 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = half3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..1a8461b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,182 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 110
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+          %S = OpTypeStruct %int %mat4v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat4v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %86 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat4v3half = OpTypePointer Private %mat4v3half
+        %104 = OpConstantNull %int
+%_ptr_Private_v3half = OpTypePointer Private %v3half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v3half %val 1
+         %23 = OpCompositeExtract %v3half %val 2
+         %24 = OpCompositeExtract %v3half %val 3
+         %25 = OpCompositeExtract %v3half %val 4
+         %26 = OpCompositeConstruct %mat4v3half %22 %23 %24 %25
+         %27 = OpCompositeExtract %int %val 5
+         %28 = OpCompositeConstruct %S %21 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v3half None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v3half %70 %uint_1
+         %74 = OpLoad %v3half %73
+         %76 = OpAccessChain %_ptr_Uniform_v3half %70 %uint_2
+         %77 = OpLoad %v3half %76
+         %80 = OpAccessChain %_ptr_Uniform_v3half %70 %uint_3
+         %81 = OpLoad %v3half %80
+         %83 = OpAccessChain %_ptr_Uniform_v3half %70 %uint_4
+         %84 = OpLoad %v3half %83
+         %85 = OpCompositeConstruct %mat4v3half %74 %77 %81 %84
+               OpReturnValue %85
+               OpFunctionEnd
+          %f = OpFunction %void None %86
+         %89 = OpLabel
+         %92 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %93 = OpLoad %_arr_S_std140_uint_4 %92
+         %90 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %93
+               OpStore %p %90
+         %96 = OpAccessChain %_ptr_Private_S %p %int_1
+         %98 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %99 = OpLoad %S_std140 %98
+         %97 = OpFunctionCall %S %conv_S %99
+               OpStore %96 %97
+        %102 = OpAccessChain %_ptr_Private_mat4v3half %p %int_3 %uint_1
+        %103 = OpFunctionCall %mat4v3half %load_u_inner_2_m
+               OpStore %102 %103
+        %106 = OpAccessChain %_ptr_Private_v3half %p %int_1 %uint_1 %104
+        %107 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %37 %uint_2
+        %108 = OpLoad %v3half %107
+        %109 = OpVectorShuffle %v3half %108 %108 2 0 1
+               OpStore %106 %109
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..988ba24
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl
new file mode 100644
index 0000000..3d34c0c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5b9dfa8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,86 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 3> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  s.Store<vector<float16_t, 3> >(136u, vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c440a1c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,91 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 3> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  s.Store<vector<float16_t, 3> >(136u, vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000168827ABEC0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..d07a67c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,100 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat4x3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  f16vec3 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat4x3(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x3 load_u_inner_2_m() {
+  return f16mat4x3(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..92723db
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half4x3 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = half3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..8105d9e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,191 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 113
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %v3half %v3half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+          %S = OpTypeStruct %int %mat4v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %35 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %38 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %51 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %64 = OpTypeFunction %mat4v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %87 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat4v3half = OpTypePointer StorageBuffer %mat4v3half
+        %107 = OpConstantNull %int
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v3half %val 1
+         %23 = OpCompositeExtract %v3half %val 2
+         %24 = OpCompositeExtract %v3half %val 3
+         %25 = OpCompositeExtract %v3half %val 4
+         %26 = OpCompositeConstruct %mat4v3half %22 %23 %24 %25
+         %27 = OpCompositeExtract %int %val 5
+         %28 = OpCompositeConstruct %S %21 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
+          %i = OpVariable %_ptr_Function_uint Function %38
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
+               OpBranch %39
+         %39 = OpLabel
+               OpLoopMerge %40 %41 None
+               OpBranch %42
+         %42 = OpLabel
+         %44 = OpLoad %uint %i
+         %45 = OpULessThan %bool %44 %uint_4
+         %43 = OpLogicalNot %bool %45
+               OpSelectionMerge %47 None
+               OpBranchConditional %43 %48 %47
+         %48 = OpLabel
+               OpBranch %40
+         %47 = OpLabel
+               OpStore %var_for_index %val_0
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_S %arr %52
+         %56 = OpLoad %uint %i
+         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
+         %59 = OpLoad %S_std140 %58
+         %55 = OpFunctionCall %S %conv_S %59
+               OpStore %54 %55
+               OpBranch %41
+         %41 = OpLabel
+         %60 = OpLoad %uint %i
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %i %62
+               OpBranch %39
+         %40 = OpLabel
+         %63 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %63
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v3half None %64
+         %66 = OpLabel
+         %71 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %74 = OpAccessChain %_ptr_Uniform_v3half %71 %uint_1
+         %75 = OpLoad %v3half %74
+         %77 = OpAccessChain %_ptr_Uniform_v3half %71 %uint_2
+         %78 = OpLoad %v3half %77
+         %81 = OpAccessChain %_ptr_Uniform_v3half %71 %uint_3
+         %82 = OpLoad %v3half %81
+         %84 = OpAccessChain %_ptr_Uniform_v3half %71 %uint_4
+         %85 = OpLoad %v3half %84
+         %86 = OpCompositeConstruct %mat4v3half %75 %78 %82 %85
+               OpReturnValue %86
+               OpFunctionEnd
+          %f = OpFunction %void None %87
+         %90 = OpLabel
+         %92 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %95 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %96 = OpLoad %_arr_S_std140_uint_4 %95
+         %93 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %96
+               OpStore %92 %93
+         %99 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+        %101 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %102 = OpLoad %S_std140 %101
+        %100 = OpFunctionCall %S %conv_S %102
+               OpStore %99 %100
+        %105 = OpAccessChain %_ptr_StorageBuffer_mat4v3half %s %uint_0 %int_3 %uint_1
+        %106 = OpFunctionCall %mat4v3half %load_u_inner_2_m
+               OpStore %105 %106
+        %109 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1 %uint_1 %107
+        %110 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %38 %uint_2
+        %111 = OpLoad %v3half %110
+        %112 = OpVectorShuffle %v3half %111 %111 2 0 1
+               OpStore %109 %112
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..0320160
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..a4c2043
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c660740
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,80 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 3> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  w[1].m[0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a9b0792
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,85 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 3> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 3> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  w[1].m[0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002136A44CFC0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..06b3e5e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,105 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat4x3 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec3 m_0;
+  f16vec3 m_1;
+  f16vec3 m_2;
+  f16vec3 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat4x3(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4x3 load_u_inner_2_m() {
+  return f16mat4x3(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, f16mat4x3(f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf)), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..eedfd26
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half4x3 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = half3((*(tint_symbol_2))[0].m[1]).zxy;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..7249176
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,225 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 135
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %int %v3half %v3half %v3half %v3half %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+          %S = OpTypeStruct %int %mat4v3half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %30 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %36 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %39 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %52 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %65 = OpTypeFunction %mat4v3half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %88 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+        %106 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat4v3half = OpTypePointer Workgroup %mat4v3half
+        %124 = OpConstantNull %int
+%_ptr_Workgroup_v3half = OpTypePointer Workgroup %v3half
+        %130 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v3half %val 1
+         %24 = OpCompositeExtract %v3half %val 2
+         %25 = OpCompositeExtract %v3half %val 3
+         %26 = OpCompositeExtract %v3half %val 4
+         %27 = OpCompositeConstruct %mat4v3half %23 %24 %25 %26
+         %28 = OpCompositeExtract %int %val 5
+         %29 = OpCompositeConstruct %S %22 %27 %28
+               OpReturnValue %29
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %30
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %33 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %36
+          %i = OpVariable %_ptr_Function_uint Function %39
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %52
+               OpBranch %40
+         %40 = OpLabel
+               OpLoopMerge %41 %42 None
+               OpBranch %43
+         %43 = OpLabel
+         %45 = OpLoad %uint %i
+         %46 = OpULessThan %bool %45 %uint_4
+         %44 = OpLogicalNot %bool %46
+               OpSelectionMerge %48 None
+               OpBranchConditional %44 %49 %48
+         %49 = OpLabel
+               OpBranch %41
+         %48 = OpLabel
+               OpStore %var_for_index %val_0
+         %53 = OpLoad %uint %i
+         %55 = OpAccessChain %_ptr_Function_S %arr %53
+         %57 = OpLoad %uint %i
+         %59 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %57
+         %60 = OpLoad %S_std140 %59
+         %56 = OpFunctionCall %S %conv_S %60
+               OpStore %55 %56
+               OpBranch %42
+         %42 = OpLabel
+         %61 = OpLoad %uint %i
+         %63 = OpIAdd %uint %61 %uint_1
+               OpStore %i %63
+               OpBranch %40
+         %41 = OpLabel
+         %64 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %64
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v3half None %65
+         %67 = OpLabel
+         %72 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %75 = OpAccessChain %_ptr_Uniform_v3half %72 %uint_1
+         %76 = OpLoad %v3half %75
+         %78 = OpAccessChain %_ptr_Uniform_v3half %72 %uint_2
+         %79 = OpLoad %v3half %78
+         %82 = OpAccessChain %_ptr_Uniform_v3half %72 %uint_3
+         %83 = OpLoad %v3half %82
+         %85 = OpAccessChain %_ptr_Uniform_v3half %72 %uint_4
+         %86 = OpLoad %v3half %85
+         %87 = OpCompositeConstruct %mat4v3half %76 %79 %83 %86
+               OpReturnValue %87
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %88
+%local_invocation_index = OpFunctionParameter %uint
+         %92 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %39
+               OpStore %idx %local_invocation_index
+               OpBranch %94
+         %94 = OpLabel
+               OpLoopMerge %95 %96 None
+               OpBranch %97
+         %97 = OpLabel
+         %99 = OpLoad %uint %idx
+        %100 = OpULessThan %bool %99 %uint_4
+         %98 = OpLogicalNot %bool %100
+               OpSelectionMerge %101 None
+               OpBranchConditional %98 %102 %101
+        %102 = OpLabel
+               OpBranch %95
+        %101 = OpLabel
+        %103 = OpLoad %uint %idx
+        %105 = OpAccessChain %_ptr_Workgroup_S %w %103
+               OpStore %105 %106
+               OpBranch %96
+         %96 = OpLabel
+        %107 = OpLoad %uint %idx
+        %108 = OpIAdd %uint %107 %uint_1
+               OpStore %idx %108
+               OpBranch %94
+         %95 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %113 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %114 = OpLoad %_arr_S_std140_uint_4 %113
+        %111 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %114
+               OpStore %w %111
+        %116 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %118 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %119 = OpLoad %S_std140 %118
+        %117 = OpFunctionCall %S %conv_S %119
+               OpStore %116 %117
+        %122 = OpAccessChain %_ptr_Workgroup_mat4v3half %w %int_3 %uint_1
+        %123 = OpFunctionCall %mat4v3half %load_u_inner_2_m
+               OpStore %122 %123
+        %126 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1 %uint_1 %124
+        %127 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %39 %uint_2
+        %128 = OpLoad %v3half %127
+        %129 = OpVectorShuffle %v3half %128 %128 2 0 1
+               OpStore %126 %129
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %130
+        %132 = OpLabel
+        %134 = OpLoad %uint %local_invocation_index_1
+        %133 = OpFunctionCall %void %f_inner %134
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..0e99164
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x3<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..73048bf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,32 @@
+struct Inner {
+  @size(64)
+  m : mat4x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat4x3<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec3<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..907314e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,77 @@
+struct Inner {
+  float4x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x3 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float4x3 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float3 l_a_i_a_i_m_i = asfloat(a[scalar_offset_4 / 4].xyz);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_5 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_5 / 4][scalar_offset_5 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..907314e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,77 @@
+struct Inner {
+  float4x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x3 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float4x3 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float3 l_a_i_a_i_m_i = asfloat(a[scalar_offset_4 / 4].xyz);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_5 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_5 / 4][scalar_offset_5 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..9f05f1e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,42 @@
+#version 310 es
+
+struct Inner {
+  mat4x3 m;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_a_i_save = tint_symbol_1;
+  int tint_symbol_2 = i();
+  int p_a_i_a_i_m_i_save = tint_symbol_2;
+  Outer l_a[4] = a.inner;
+  Outer l_a_i = a.inner[p_a_i_save];
+  Inner l_a_i_a[4] = a.inner[p_a_i_save].a;
+  Inner l_a_i_a_i = a.inner[p_a_i_save].a[p_a_i_a_i_save];
+  mat4x3 l_a_i_a_i_m = a.inner[p_a_i_save].a[p_a_i_a_i_save].m;
+  vec3 l_a_i_a_i_m_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int tint_symbol_3 = i();
+  float l_a_i_a_i_m_i_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..ea70250
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float4x3 m;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  float4x3 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  float3 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..83ed200
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,88 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 54
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+      %Inner = OpTypeStruct %mat4v3float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %14 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %14
+         %17 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %24 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %i = OpFunction %int None %17
+         %19 = OpLabel
+         %20 = OpLoad %int %counter
+         %22 = OpIAdd %int %20 %int_1
+               OpStore %counter %22
+         %23 = OpLoad %int %counter
+               OpReturnValue %23
+               OpFunctionEnd
+          %f = OpFunction %void None %24
+         %27 = OpLabel
+         %28 = OpFunctionCall %int %i
+         %29 = OpFunctionCall %int %i
+         %30 = OpFunctionCall %int %i
+         %33 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %34 = OpLoad %_arr_Outer_uint_4 %33
+         %36 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %28
+         %37 = OpLoad %Outer %36
+         %39 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %28 %uint_0
+         %40 = OpLoad %_arr_Inner_uint_4 %39
+         %42 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %28 %uint_0 %29
+         %43 = OpLoad %Inner %42
+         %45 = OpAccessChain %_ptr_Uniform_mat4v3float %a %uint_0 %28 %uint_0 %29 %uint_0
+         %46 = OpLoad %mat4v3float %45
+         %48 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %28 %uint_0 %29 %uint_0 %30
+         %49 = OpLoad %v3float %48
+         %50 = OpFunctionCall %int %i
+         %52 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %28 %uint_0 %29 %uint_0 %30 %50
+         %53 = OpLoad %float %52
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..206ba53
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+struct Inner {
+  @size(64)
+  m : mat4x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat4x3<f32> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec3<f32> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..a985f02
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,29 @@
+struct Inner {
+  @size(64)
+  m : mat4x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat4x3<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec3<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a678c85
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,62 @@
+struct Inner {
+  float4x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float4x3 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float4x3 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float3 l_a_3_a_2_m_1 = asfloat(a[57].xyz);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a678c85
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,62 @@
+struct Inner {
+  float4x3 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float4x3 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float4x3 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float3 l_a_3_a_2_m_1 = asfloat(a[57].xyz);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..63ae890
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+
+struct Inner {
+  mat4x3 m;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+void f() {
+  Outer l_a[4] = a.inner;
+  Outer l_a_3 = a.inner[3];
+  Inner l_a_3_a[4] = a.inner[3].a;
+  Inner l_a_3_a_2 = a.inner[3].a[2];
+  mat4x3 l_a_3_a_2_m = a.inner[3].a[2].m;
+  vec3 l_a_3_a_2_m_1 = a.inner[3].a[2].m[1];
+  float l_a_3_a_2_m_1_0 = a.inner[3].a[2].m[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..a7fb219
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float4x3 m;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  float4x3 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  float3 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..310cfbc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,73 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+      %Inner = OpTypeStruct %mat4v3float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+        %int = OpTypeInt 32 1
+      %int_3 = OpConstant %int 3
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+         %40 = OpConstantNull %int
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %20 = OpLoad %_arr_Outer_uint_4 %19
+         %24 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %int_3
+         %25 = OpLoad %Outer %24
+         %27 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %int_3 %uint_0
+         %28 = OpLoad %_arr_Inner_uint_4 %27
+         %31 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %int_3 %uint_0 %int_2
+         %32 = OpLoad %Inner %31
+         %34 = OpAccessChain %_ptr_Uniform_mat4v3float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0
+         %35 = OpLoad %mat4v3float %34
+         %38 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1
+         %39 = OpLoad %v3float %38
+         %42 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1 %40
+         %43 = OpLoad %float %42
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..90b476f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,27 @@
+struct Inner {
+  @size(64)
+  m : mat4x3<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat4x3<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec3<f32> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl
new file mode 100644
index 0000000..127d8ef
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat4x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].zxy);
+    let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..37cf503
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+
+float4x3 tint_symbol(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 t = transpose(tint_symbol(u, 400u));
+  const float l = length(asfloat(u[2].xyz).zxy);
+  const float a = abs(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..37cf503
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+
+float4x3 tint_symbol(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 t = transpose(tint_symbol(u, 400u));
+  const float l = length(asfloat(u[2].xyz).zxy);
+  const float a = abs(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..4e77e60
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,53 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat4x3 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  int after;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+  uint pad_28;
+  uint pad_29;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void f() {
+  mat3x4 t = transpose(u.inner[2].m);
+  float l = length(u.inner[0].m[1].zxy);
+  float a = abs(u.inner[0].m[1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..90364fc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float4x3 m;
+  /* 0x0050 */ tint_array<int8_t, 48> tint_pad_1;
+  /* 0x0080 */ int after;
+  /* 0x0084 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float3x4 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float3((*(tint_symbol))[0].m[1]).zxy);
+  float const a = fabs(float3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..1ce238f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,67 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 38
+; Schema: 0
+               OpCapability Shader
+         %26 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 128
+               OpDecorate %_arr_S_uint_4 ArrayStride 192
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+          %S = OpTypeStruct %int %mat4v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+     %uint_0 = OpConstant %uint 0
+      %int_2 = OpConstant %int 2
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %27 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %23 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2 %uint_1
+         %24 = OpLoad %mat4v3float %23
+         %16 = OpTranspose %mat3v4float %24
+         %30 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %27 %uint_1 %int_1
+         %31 = OpLoad %v3float %30
+         %32 = OpVectorShuffle %v3float %31 %31 2 0 1
+         %25 = OpExtInst %float %26 Length %32
+         %34 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %27 %uint_1 %int_1
+         %35 = OpLoad %v3float %34
+         %36 = OpVectorShuffle %v3float %35 %35 2 0 1
+         %37 = OpCompositeExtract %float %36 0
+         %33 = OpExtInst %float %26 FAbs %37
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..59ed8c5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat4x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].zxy);
+  let a = abs(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl
new file mode 100644
index 0000000..37a0425
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl

@@ -0,0 +1,23 @@
+struct S {
+  before : i32,
+  m : mat4x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat4x3<f32>) {}
+fn d(v : vec3<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].zxy);
+    e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2fadcff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float4x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float4x3 m) {
+}
+
+void d(float3 v) {
+}
+
+void e(float f_1) {
+}
+
+float4x3 tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 384u));
+  c(tint_symbol_3(u, 400u));
+  d(asfloat(u[2].xyz).zxy);
+  e(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2fadcff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float4x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float4x3 m) {
+}
+
+void d(float3 v) {
+}
+
+void e(float f_1) {
+}
+
+float4x3 tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 384u));
+  c(tint_symbol_3(u, 400u));
+  d(asfloat(u[2].xyz).zxy);
+  e(asfloat(u[2].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..e2289e9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,70 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat4x3 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  int after;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+  uint pad_28;
+  uint pad_29;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat4x3 m) {
+}
+
+void d(vec3 v) {
+}
+
+void e(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[2]);
+  c(u.inner[2].m);
+  d(u.inner[0].m[1].zxy);
+  e(u.inner[0].m[1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..2406d51
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float4x3 m;
+  /* 0x0050 */ tint_array<int8_t, 48> tint_pad_1;
+  /* 0x0080 */ int after;
+  /* 0x0084 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float4x3 m) {
+}
+
+void d(float3 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float3((*(tint_symbol))[0].m[1]).zxy);
+  e(float3((*(tint_symbol))[0].m[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..1679e28
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,112 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 63
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 128
+               OpDecorate %_arr_S_uint_4 ArrayStride 192
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+          %S = OpTypeStruct %int %mat4v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %_arr_S_uint_4
+         %17 = OpTypeFunction %void %S
+         %21 = OpTypeFunction %void %mat4v3float
+         %25 = OpTypeFunction %void %v3float
+         %29 = OpTypeFunction %void %float
+         %33 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %52 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %a = OpFunction %void None %12
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %s = OpFunctionParameter %S
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %m = OpFunctionParameter %mat4v3float
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+          %v = OpFunctionParameter %v3float
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %29
+        %f_1 = OpFunctionParameter %float
+         %32 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %40 = OpLoad %_arr_S_uint_4 %39
+         %36 = OpFunctionCall %void %a %40
+         %44 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %45 = OpLoad %S %44
+         %41 = OpFunctionCall %void %b %45
+         %49 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2 %uint_1
+         %50 = OpLoad %mat4v3float %49
+         %46 = OpFunctionCall %void %c %50
+         %55 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %52 %uint_1 %int_1
+         %56 = OpLoad %v3float %55
+         %57 = OpVectorShuffle %v3float %56 %56 2 0 1
+         %51 = OpFunctionCall %void %d %57
+         %59 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %52 %uint_1 %int_1
+         %60 = OpLoad %v3float %59
+         %61 = OpVectorShuffle %v3float %60 %60 2 0 1
+         %62 = OpCompositeExtract %float %61 0
+         %58 = OpFunctionCall %void %e %62
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..934c242
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,32 @@
+struct S {
+  before : i32,
+  m : mat4x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat4x3<f32>) {
+}
+
+fn d(v : vec3<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].zxy);
+  e(u[0].m[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl
new file mode 100644
index 0000000..591c9ae
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5426bb4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,45 @@
+struct S {
+  int before;
+  float4x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+static S p[4] = (S[4])0;
+
+float4x3 tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 384u);
+  p[3].m = tint_symbol_3(u, 400u);
+  p[1].m[0] = asfloat(u[2].xyz).zxy;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5426bb4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,45 @@
+struct S {
+  int before;
+  float4x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+static S p[4] = (S[4])0;
+
+float4x3 tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+S tint_symbol_1(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 384u);
+  p[3].m = tint_symbol_3(u, 400u);
+  p[1].m[0] = asfloat(u[2].xyz).zxy;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..80b3c35
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,55 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat4x3 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  int after;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+  uint pad_28;
+  uint pad_29;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, 0u, 0u, mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[3].m = u.inner[2].m;
+  p[1].m[0] = u.inner[0].m[1].zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..2392000
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float4x3 m;
+  /* 0x0050 */ tint_array<int8_t, 48> tint_pad_1;
+  /* 0x0080 */ int after;
+  /* 0x0084 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..7152135
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,78 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 128
+               OpDecorate %_arr_S_uint_4 ArrayStride 192
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+          %S = OpTypeStruct %int %mat4v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %14 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %14
+       %void = OpTypeVoid
+         %15 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_Private_mat4v3float = OpTypePointer Private %mat4v3float
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %37 = OpConstantNull %int
+%_ptr_Private_v3float = OpTypePointer Private %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %15
+         %18 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %22 = OpLoad %_arr_S_uint_4 %21
+               OpStore %p %22
+         %25 = OpAccessChain %_ptr_Private_S %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %29 = OpLoad %S %28
+               OpStore %25 %29
+         %33 = OpAccessChain %_ptr_Private_mat4v3float %p %int_3 %uint_1
+         %35 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2 %uint_1
+         %36 = OpLoad %mat4v3float %35
+               OpStore %33 %36
+         %39 = OpAccessChain %_ptr_Private_v3float %p %int_1 %uint_1 %37
+         %41 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %37 %uint_1 %int_1
+         %42 = OpLoad %v3float %41
+         %43 = OpVectorShuffle %v3float %42 %42 2 0 1
+               OpStore %39 %43
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..fc15a1c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat4x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl
new file mode 100644
index 0000000..a46bb45
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f910578
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,67 @@
+struct S {
+  int before;
+  float4x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 128u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 192u)), array[i]);
+    }
+  }
+}
+
+float4x3 tint_symbol_8(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+S tint_symbol_6(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 192u, tint_symbol_6(u, 384u));
+  tint_symbol_3(s, 592u, tint_symbol_8(u, 400u));
+  s.Store3(208u, asuint(asfloat(u[2].xyz).zxy));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f910578
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,67 @@
+struct S {
+  int before;
+  float4x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 128u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 192u)), array[i]);
+    }
+  }
+}
+
+float4x3 tint_symbol_8(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+S tint_symbol_6(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 192u, tint_symbol_6(u, 384u));
+  tint_symbol_3(s, 592u, tint_symbol_8(u, 400u));
+  s.Store3(208u, asuint(asfloat(u[2].xyz).zxy));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..d852f0c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,58 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat4x3 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  int after;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+  uint pad_28;
+  uint pad_29;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[3].m = u.inner[2].m;
+  s.inner[1].m[0] = u.inner[0].m[1].zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..19fab81
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float4x3 m;
+  /* 0x0050 */ tint_array<int8_t, 48> tint_pad_1;
+  /* 0x0080 */ int after;
+  /* 0x0084 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float3((*(tint_symbol_1))[0].m[1]).zxy;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..54a79a4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 128
+               OpDecorate %_arr_S_uint_4 ArrayStride 192
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+          %S = OpTypeStruct %int %mat4v3float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %38 = OpConstantNull %int
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %14
+         %17 = OpLabel
+         %20 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %22 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %23 = OpLoad %_arr_S_uint_4 %22
+               OpStore %20 %23
+         %26 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %30 = OpLoad %S %29
+               OpStore %26 %30
+         %34 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %s %uint_0 %int_3 %uint_1
+         %36 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2 %uint_1
+         %37 = OpLoad %mat4v3float %36
+               OpStore %34 %37
+         %40 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1 %uint_1 %38
+         %42 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %38 %uint_1 %int_1
+         %43 = OpLoad %v3float %42
+         %44 = OpVectorShuffle %v3float %43 %43 2 0 1
+               OpStore %40 %44
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..6219423
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat4x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..eea6519
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..11ebf40
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,61 @@
+struct S {
+  int before;
+  float4x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x3 tint_symbol_5(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+S tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 192u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 384u);
+  w[3].m = tint_symbol_5(u, 400u);
+  w[1].m[0] = asfloat(u[2].xyz).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..11ebf40
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,61 @@
+struct S {
+  int before;
+  float4x3 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x3 tint_symbol_5(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+S tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 192u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 384u);
+  w[3].m = tint_symbol_5(u, 400u);
+  w[1].m[0] = asfloat(u[2].xyz).zxy;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..a5f912f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,63 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat4x3 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  int after;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+  uint pad_28;
+  uint pad_29;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+shared S w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, 0u, 0u, mat4x3(vec3(0.0f), vec3(0.0f), vec3(0.0f), vec3(0.0f)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[3].m = u.inner[2].m;
+  w[1].m[0] = u.inner[0].m[1].zxy;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..17fa37d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float4x3 m;
+  /* 0x0050 */ tint_array<int8_t, 48> tint_pad_1;
+  /* 0x0080 */ int after;
+  /* 0x0084 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float3((*(tint_symbol_2))[0].m[1]).zxy;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..27c9f46
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,124 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 72
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 128
+               OpDecorate %_arr_S_uint_4 ArrayStride 192
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+          %S = OpTypeStruct %int %mat4v3float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+       %void = OpTypeVoid
+         %16 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %23 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %37 = OpConstantNull %S
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat4v3float = OpTypePointer Workgroup %mat4v3float
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %60 = OpConstantNull %int
+%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+         %67 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %16
+%local_invocation_index = OpFunctionParameter %uint
+         %20 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %23
+               OpStore %idx %local_invocation_index
+               OpBranch %24
+         %24 = OpLabel
+               OpLoopMerge %25 %26 None
+               OpBranch %27
+         %27 = OpLabel
+         %29 = OpLoad %uint %idx
+         %30 = OpULessThan %bool %29 %uint_4
+         %28 = OpLogicalNot %bool %30
+               OpSelectionMerge %32 None
+               OpBranchConditional %28 %33 %32
+         %33 = OpLabel
+               OpBranch %25
+         %32 = OpLabel
+         %34 = OpLoad %uint %idx
+         %36 = OpAccessChain %_ptr_Workgroup_S %w %34
+               OpStore %36 %37
+               OpBranch %26
+         %26 = OpLabel
+         %38 = OpLoad %uint %idx
+         %40 = OpIAdd %uint %38 %uint_1
+               OpStore %idx %40
+               OpBranch %24
+         %25 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %46 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %47 = OpLoad %_arr_S_uint_4 %46
+               OpStore %w %47
+         %49 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+         %52 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %53 = OpLoad %S %52
+               OpStore %49 %53
+         %56 = OpAccessChain %_ptr_Workgroup_mat4v3float %w %int_3 %uint_1
+         %58 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2 %uint_1
+         %59 = OpLoad %mat4v3float %58
+               OpStore %56 %59
+         %62 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1 %uint_1 %60
+         %64 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %60 %uint_1 %int_1
+         %65 = OpLoad %v3float %64
+         %66 = OpVectorShuffle %v3float %65 %65 2 0 1
+               OpStore %62 %66
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %67
+         %69 = OpLabel
+         %71 = OpLoad %uint %local_invocation_index_1
+         %70 = OpFunctionCall %void %f_inner %71
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..4b7ef2b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat4x3<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].zxy;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..688558a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat4x4<f16>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec4<f16>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f16             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c0fa0aa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,98 @@
+struct Inner {
+  matrix<float16_t, 4, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 4> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 4, 4> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_9 = a[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_a_i_a_i_m_i = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..835df5d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,103 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 4, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 4> tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const matrix<float16_t, 4, 4> l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (8u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  uint4 ubo_load_9 = a[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_a_i_a_i_m_i = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_bytes = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (8u * uint(tint_symbol_2))) + (2u * uint(tint_symbol_3))));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const float16_t l_a_i_a_i_m_i_i = float16_t(f16tof32(((a[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000022FAD9FCD80(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..5578dfd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,157 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat4 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Inner_std140 {
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  f16vec4 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat4(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4 load_a_inner_p0_a_p1_m(uint p0, uint p1) {
+  uint s_save = p0;
+  uint s_save_1 = p1;
+  return f16mat4(a.inner[s_save].a[s_save_1].m_0, a.inner[s_save].a[s_save_1].m_1, a.inner[s_save].a[s_save_1].m_2, a.inner[s_save].a[s_save_1].m_3);
+}
+
+f16vec4 load_a_inner_p0_a_p1_m_p2(uint p0, uint p1, uint p2) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2;
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].a[p1].m_3;
+      break;
+    }
+    default: {
+      return f16vec4(0.0hf);
+      break;
+    }
+  }
+}
+
+float16_t load_a_inner_p0_a_p1_m_p2_p3(uint p0, uint p1, uint p2, uint p3) {
+  switch(p2) {
+    case 0u: {
+      return a.inner[p0].a[p1].m_0[p3];
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].a[p1].m_1[p3];
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].a[p1].m_2[p3];
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].a[p1].m_3[p3];
+      break;
+    }
+    default: {
+      return 0.0hf;
+      break;
+    }
+  }
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  int tint_symbol = i();
+  Outer p_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner p_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  int tint_symbol_1 = i();
+  Inner p_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat4 p_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  int tint_symbol_2 = i();
+  f16vec4 p_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_i = conv_Outer(a.inner[tint_symbol]);
+  Inner l_a_i_a[4] = conv_arr4_Inner(a.inner[tint_symbol].a);
+  Inner l_a_i_a_i = conv_Inner(a.inner[tint_symbol].a[tint_symbol_1]);
+  f16mat4 l_a_i_a_i_m = load_a_inner_p0_a_p1_m(uint(tint_symbol), uint(tint_symbol_1));
+  f16vec4 l_a_i_a_i_m_i = load_a_inner_p0_a_p1_m_p2(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2));
+  int tint_symbol_3 = i();
+  float16_t l_a_i_a_i_m_i_i = load_a_inner_p0_a_p1_m_p2_p3(uint(tint_symbol), uint(tint_symbol_1), uint(tint_symbol_2), uint(tint_symbol_3));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..31fbf6a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half4x4 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  half4x4 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  half4 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  half const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..14536fe
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,338 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 215
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpMemberName %Inner_std140 3 "m_3"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_1 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_a_p1_m "load_a_inner_p0_a_p1_m"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %load_a_inner_p0_a_p1_m_p2 "load_a_inner_p0_a_p1_m_p2"
+               OpName %p0_0 "p0"
+               OpName %p1_0 "p1"
+               OpName %p2 "p2"
+               OpName %load_a_inner_p0_a_p1_m_p2_p3 "load_a_inner_p0_a_p1_m_p2_p3"
+               OpName %p0_1 "p0"
+               OpName %p1_1 "p1"
+               OpName %p2_0 "p2"
+               OpName %p3 "p3"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpMemberDecorate %Inner_std140 3 Offset 24
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%Inner_std140 = OpTypeStruct %v4half %v4half %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %13 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %13
+         %16 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v4half = OpTypeMatrix %v4half 4
+      %Inner = OpTypeStruct %mat4v4half
+         %23 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %35 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %42 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %45 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %58 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %71 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %79 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %86 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %99 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %111 = OpTypeFunction %mat4v4half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+        %136 = OpTypeFunction %v4half %uint %uint %uint
+        %156 = OpConstantNull %v4half
+        %157 = OpTypeFunction %half %uint %uint %uint %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %179 = OpConstantNull %half
+       %void = OpTypeVoid
+        %180 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+          %i = OpFunction %int None %16
+         %18 = OpLabel
+         %19 = OpLoad %int %counter
+         %21 = OpIAdd %int %19 %int_1
+               OpStore %counter %21
+         %22 = OpLoad %int %counter
+               OpReturnValue %22
+               OpFunctionEnd
+ %conv_Inner = OpFunction %Inner None %23
+        %val = OpFunctionParameter %Inner_std140
+         %28 = OpLabel
+         %29 = OpCompositeExtract %v4half %val 0
+         %30 = OpCompositeExtract %v4half %val 1
+         %31 = OpCompositeExtract %v4half %val 2
+         %32 = OpCompositeExtract %v4half %val 3
+         %33 = OpCompositeConstruct %mat4v4half %29 %30 %31 %32
+         %34 = OpCompositeConstruct %Inner %33
+               OpReturnValue %34
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %35
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %39 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %42
+        %i_0 = OpVariable %_ptr_Function_uint Function %45
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %58
+               OpBranch %46
+         %46 = OpLabel
+               OpLoopMerge %47 %48 None
+               OpBranch %49
+         %49 = OpLabel
+         %51 = OpLoad %uint %i_0
+         %52 = OpULessThan %bool %51 %uint_4
+         %50 = OpLogicalNot %bool %52
+               OpSelectionMerge %54 None
+               OpBranchConditional %50 %55 %54
+         %55 = OpLabel
+               OpBranch %47
+         %54 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %59 = OpLoad %uint %i_0
+         %61 = OpAccessChain %_ptr_Function_Inner %arr %59
+         %63 = OpLoad %uint %i_0
+         %65 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %63
+         %66 = OpLoad %Inner_std140 %65
+         %62 = OpFunctionCall %Inner %conv_Inner %66
+               OpStore %61 %62
+               OpBranch %48
+         %48 = OpLabel
+         %67 = OpLoad %uint %i_0
+         %69 = OpIAdd %uint %67 %uint_1
+               OpStore %i_0 %69
+               OpBranch %46
+         %47 = OpLabel
+         %70 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %70
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %71
+      %val_1 = OpFunctionParameter %Outer_std140
+         %75 = OpLabel
+         %77 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %76 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %77
+         %78 = OpCompositeConstruct %Outer %76
+               OpReturnValue %78
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %79
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %83 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %86
+        %i_1 = OpVariable %_ptr_Function_uint Function %45
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %99
+               OpBranch %88
+         %88 = OpLabel
+               OpLoopMerge %89 %90 None
+               OpBranch %91
+         %91 = OpLabel
+         %93 = OpLoad %uint %i_1
+         %94 = OpULessThan %bool %93 %uint_4
+         %92 = OpLogicalNot %bool %94
+               OpSelectionMerge %95 None
+               OpBranchConditional %92 %96 %95
+         %96 = OpLabel
+               OpBranch %89
+         %95 = OpLabel
+               OpStore %var_for_index %val_2
+        %100 = OpLoad %uint %i_1
+        %102 = OpAccessChain %_ptr_Function_Outer %arr_0 %100
+        %104 = OpLoad %uint %i_1
+        %106 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %104
+        %107 = OpLoad %Outer_std140 %106
+        %103 = OpFunctionCall %Outer %conv_Outer %107
+               OpStore %102 %103
+               OpBranch %90
+         %90 = OpLabel
+        %108 = OpLoad %uint %i_1
+        %109 = OpIAdd %uint %108 %uint_1
+               OpStore %i_1 %109
+               OpBranch %88
+         %89 = OpLabel
+        %110 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %110
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m = OpFunction %mat4v4half None %111
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+        %115 = OpLabel
+        %119 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %p0 %uint_0 %p1
+        %122 = OpAccessChain %_ptr_Uniform_v4half %119 %uint_0
+        %123 = OpLoad %v4half %122
+        %125 = OpAccessChain %_ptr_Uniform_v4half %119 %uint_1
+        %126 = OpLoad %v4half %125
+        %129 = OpAccessChain %_ptr_Uniform_v4half %119 %uint_2
+        %130 = OpLoad %v4half %129
+        %133 = OpAccessChain %_ptr_Uniform_v4half %119 %uint_3
+        %134 = OpLoad %v4half %133
+        %135 = OpCompositeConstruct %mat4v4half %123 %126 %130 %134
+               OpReturnValue %135
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2 = OpFunction %v4half None %136
+       %p0_0 = OpFunctionParameter %uint
+       %p1_0 = OpFunctionParameter %uint
+         %p2 = OpFunctionParameter %uint
+        %141 = OpLabel
+               OpSelectionMerge %142 None
+               OpSwitch %p2 %143 0 %144 1 %145 2 %146 3 %147
+        %144 = OpLabel
+        %148 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_0
+        %149 = OpLoad %v4half %148
+               OpReturnValue %149
+        %145 = OpLabel
+        %150 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_1
+        %151 = OpLoad %v4half %150
+               OpReturnValue %151
+        %146 = OpLabel
+        %152 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_2
+        %153 = OpLoad %v4half %152
+               OpReturnValue %153
+        %147 = OpLabel
+        %154 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %p0_0 %uint_0 %p1_0 %uint_3
+        %155 = OpLoad %v4half %154
+               OpReturnValue %155
+        %143 = OpLabel
+               OpReturnValue %156
+        %142 = OpLabel
+               OpReturnValue %156
+               OpFunctionEnd
+%load_a_inner_p0_a_p1_m_p2_p3 = OpFunction %half None %157
+       %p0_1 = OpFunctionParameter %uint
+       %p1_1 = OpFunctionParameter %uint
+       %p2_0 = OpFunctionParameter %uint
+         %p3 = OpFunctionParameter %uint
+        %163 = OpLabel
+               OpSelectionMerge %164 None
+               OpSwitch %p2_0 %165 0 %166 1 %167 2 %168 3 %169
+        %166 = OpLabel
+        %171 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_0 %p3
+        %172 = OpLoad %half %171
+               OpReturnValue %172
+        %167 = OpLabel
+        %173 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_1 %p3
+        %174 = OpLoad %half %173
+               OpReturnValue %174
+        %168 = OpLabel
+        %175 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_2 %p3
+        %176 = OpLoad %half %175
+               OpReturnValue %176
+        %169 = OpLabel
+        %177 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %p0_1 %uint_0 %p1_1 %uint_3 %p3
+        %178 = OpLoad %half %177
+               OpReturnValue %178
+        %165 = OpLabel
+               OpReturnValue %179
+        %164 = OpLabel
+               OpReturnValue %179
+               OpFunctionEnd
+          %f = OpFunction %void None %180
+        %183 = OpLabel
+        %184 = OpFunctionCall %int %i
+        %185 = OpFunctionCall %int %i
+        %186 = OpFunctionCall %int %i
+        %189 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %190 = OpLoad %_arr_Outer_std140_uint_4 %189
+        %187 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %190
+        %193 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %184
+        %194 = OpLoad %Outer_std140 %193
+        %191 = OpFunctionCall %Outer %conv_Outer %194
+        %197 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %184 %uint_0
+        %198 = OpLoad %_arr_Inner_std140_uint_4 %197
+        %195 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %198
+        %200 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %184 %uint_0 %185
+        %201 = OpLoad %Inner_std140 %200
+        %199 = OpFunctionCall %Inner %conv_Inner %201
+        %203 = OpBitcast %uint %184
+        %204 = OpBitcast %uint %185
+        %202 = OpFunctionCall %mat4v4half %load_a_inner_p0_a_p1_m %203 %204
+        %206 = OpBitcast %uint %184
+        %207 = OpBitcast %uint %185
+        %208 = OpBitcast %uint %186
+        %205 = OpFunctionCall %v4half %load_a_inner_p0_a_p1_m_p2 %206 %207 %208
+        %209 = OpFunctionCall %int %i
+        %211 = OpBitcast %uint %184
+        %212 = OpBitcast %uint %185
+        %213 = OpBitcast %uint %186
+        %214 = OpBitcast %uint %209
+        %210 = OpFunctionCall %half %load_a_inner_p0_a_p1_m_p2_p3 %211 %212 %213 %214
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..181af9f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,36 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat4x4<f16> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec4<f16> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f16 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..d8ec543
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,31 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat4x4<f16>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec4<f16>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f16             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5af54cf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,81 @@
+struct Inner {
+  matrix<float16_t, 4, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 4, 4> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 4, 4> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_8 = a[56].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_a_3_a_2_m_1 = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..cb11b50
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,86 @@
+SKIP: FAILED
+
+struct Inner {
+  matrix<float16_t, 4, 4> m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+matrix<float16_t, 4, 4> tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const matrix<float16_t, 4, 4> l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  uint2 ubo_load_8 = a[56].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_a_3_a_2_m_1 = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  const float16_t l_a_3_a_2_m_1_0 = float16_t(f16tof32(((a[56].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000015C09593480(2,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..c87dc31
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,95 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  f16mat4 m;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Inner_std140 {
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  f16vec4 m_3;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+struct Outer_std140 {
+  Inner_std140 a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  Outer_std140 inner[4];
+} a;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(f16mat4(val.m_0, val.m_1, val.m_2, val.m_3), val.pad, val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.pad_7);
+}
+
+Inner[4] conv_arr4_Inner(Inner_std140 val[4]) {
+  Inner arr[4] = Inner[4](Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Inner(val[i]);
+    }
+  }
+  return arr;
+}
+
+Outer conv_Outer(Outer_std140 val) {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+Outer[4] conv_arr4_Outer(Outer_std140 val[4]) {
+  Outer arr[4] = Outer[4](Outer(Inner[4](Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))), Outer(Inner[4](Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), Inner(f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u))));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_Outer(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4 load_a_inner_3_a_2_m() {
+  return f16mat4(a.inner[3u].a[2u].m_0, a.inner[3u].a[2u].m_1, a.inner[3u].a[2u].m_2, a.inner[3u].a[2u].m_3);
+}
+
+void f() {
+  Outer p_a[4] = conv_arr4_Outer(a.inner);
+  Outer p_a_3 = conv_Outer(a.inner[3u]);
+  Inner p_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner p_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat4 p_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec4 p_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  Outer l_a[4] = conv_arr4_Outer(a.inner);
+  Outer l_a_3 = conv_Outer(a.inner[3u]);
+  Inner l_a_3_a[4] = conv_arr4_Inner(a.inner[3u].a);
+  Inner l_a_3_a_2 = conv_Inner(a.inner[3u].a[2u]);
+  f16mat4 l_a_3_a_2_m = load_a_inner_3_a_2_m();
+  f16vec4 l_a_3_a_2_m_1 = a.inner[3u].a[2u].m_1;
+  float16_t l_a_3_a_2_m_1_0 = a.inner[3u].a[2u].m_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..1f4ea61
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half4x4 m;
+  /* 0x0020 */ tint_array<int8_t, 32> tint_pad;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  half4x4 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  half4 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  half const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..e9f2926
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,237 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 148
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %Outer_std140 "Outer_std140"
+               OpMemberName %Outer_std140 0 "a"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "m_0"
+               OpMemberName %Inner_std140 1 "m_1"
+               OpMemberName %Inner_std140 2 "m_2"
+               OpMemberName %Inner_std140 3 "m_3"
+               OpName %a "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %conv_arr4_Inner "conv_arr4_Inner"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index_1 "var_for_index_1"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %conv_Outer "conv_Outer"
+               OpName %val_1 "val"
+               OpName %conv_arr4_Outer "conv_arr4_Outer"
+               OpName %val_2 "val"
+               OpName %arr_0 "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_3_a_2_m "load_a_inner_3_a_2_m"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %Outer_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpMemberDecorate %Inner_std140 3 Offset 24
+               OpDecorate %_arr_Inner_std140_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_std140_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 8
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpMemberDecorate %Outer 0 Offset 0
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%Inner_std140 = OpTypeStruct %v4half %v4half %v4half %v4half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_std140_uint_4 = OpTypeArray %Inner_std140 %uint_4
+%Outer_std140 = OpTypeStruct %_arr_Inner_std140_uint_4
+%_arr_Outer_std140_uint_4 = OpTypeArray %Outer_std140 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_Outer_std140_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+      %Inner = OpTypeStruct %mat4v4half
+         %12 = OpTypeFunction %Inner %Inner_std140
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+         %24 = OpTypeFunction %_arr_Inner_uint_4 %_arr_Inner_std140_uint_4
+%_ptr_Function__arr_Inner_uint_4 = OpTypePointer Function %_arr_Inner_uint_4
+         %31 = OpConstantNull %_arr_Inner_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %34 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_Inner_std140_uint_4 = OpTypePointer Function %_arr_Inner_std140_uint_4
+         %47 = OpConstantNull %_arr_Inner_std140_uint_4
+%_ptr_Function_Inner = OpTypePointer Function %Inner
+%_ptr_Function_Inner_std140 = OpTypePointer Function %Inner_std140
+     %uint_1 = OpConstant %uint 1
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+         %60 = OpTypeFunction %Outer %Outer_std140
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+         %68 = OpTypeFunction %_arr_Outer_uint_4 %_arr_Outer_std140_uint_4
+%_ptr_Function__arr_Outer_uint_4 = OpTypePointer Function %_arr_Outer_uint_4
+         %75 = OpConstantNull %_arr_Outer_uint_4
+%_ptr_Function__arr_Outer_std140_uint_4 = OpTypePointer Function %_arr_Outer_std140_uint_4
+         %88 = OpConstantNull %_arr_Outer_std140_uint_4
+%_ptr_Function_Outer = OpTypePointer Function %Outer
+%_ptr_Function_Outer_std140 = OpTypePointer Function %Outer_std140
+        %100 = OpTypeFunction %mat4v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_Inner_std140 = OpTypePointer Uniform %Inner_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %void = OpTypeVoid
+        %123 = OpTypeFunction %void
+%_ptr_Uniform__arr_Outer_std140_uint_4 = OpTypePointer Uniform %_arr_Outer_std140_uint_4
+%_ptr_Uniform_Outer_std140 = OpTypePointer Uniform %Outer_std140
+%_ptr_Uniform__arr_Inner_std140_uint_4 = OpTypePointer Uniform %_arr_Inner_std140_uint_4
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+ %conv_Inner = OpFunction %Inner None %12
+        %val = OpFunctionParameter %Inner_std140
+         %17 = OpLabel
+         %18 = OpCompositeExtract %v4half %val 0
+         %19 = OpCompositeExtract %v4half %val 1
+         %20 = OpCompositeExtract %v4half %val 2
+         %21 = OpCompositeExtract %v4half %val 3
+         %22 = OpCompositeConstruct %mat4v4half %18 %19 %20 %21
+         %23 = OpCompositeConstruct %Inner %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_Inner = OpFunction %_arr_Inner_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_Inner_std140_uint_4
+         %28 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_Inner_uint_4 Function %31
+          %i = OpVariable %_ptr_Function_uint Function %34
+%var_for_index_1 = OpVariable %_ptr_Function__arr_Inner_std140_uint_4 Function %47
+               OpBranch %35
+         %35 = OpLabel
+               OpLoopMerge %36 %37 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %uint %i
+         %41 = OpULessThan %bool %40 %uint_4
+         %39 = OpLogicalNot %bool %41
+               OpSelectionMerge %43 None
+               OpBranchConditional %39 %44 %43
+         %44 = OpLabel
+               OpBranch %36
+         %43 = OpLabel
+               OpStore %var_for_index_1 %val_0
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_Inner %arr %48
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_Inner_std140 %var_for_index_1 %52
+         %55 = OpLoad %Inner_std140 %54
+         %51 = OpFunctionCall %Inner %conv_Inner %55
+               OpStore %50 %51
+               OpBranch %37
+         %37 = OpLabel
+         %56 = OpLoad %uint %i
+         %58 = OpIAdd %uint %56 %uint_1
+               OpStore %i %58
+               OpBranch %35
+         %36 = OpLabel
+         %59 = OpLoad %_arr_Inner_uint_4 %arr
+               OpReturnValue %59
+               OpFunctionEnd
+ %conv_Outer = OpFunction %Outer None %60
+      %val_1 = OpFunctionParameter %Outer_std140
+         %64 = OpLabel
+         %66 = OpCompositeExtract %_arr_Inner_std140_uint_4 %val_1 0
+         %65 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %66
+         %67 = OpCompositeConstruct %Outer %65
+               OpReturnValue %67
+               OpFunctionEnd
+%conv_arr4_Outer = OpFunction %_arr_Outer_uint_4 None %68
+      %val_2 = OpFunctionParameter %_arr_Outer_std140_uint_4
+         %72 = OpLabel
+      %arr_0 = OpVariable %_ptr_Function__arr_Outer_uint_4 Function %75
+        %i_0 = OpVariable %_ptr_Function_uint Function %34
+%var_for_index = OpVariable %_ptr_Function__arr_Outer_std140_uint_4 Function %88
+               OpBranch %77
+         %77 = OpLabel
+               OpLoopMerge %78 %79 None
+               OpBranch %80
+         %80 = OpLabel
+         %82 = OpLoad %uint %i_0
+         %83 = OpULessThan %bool %82 %uint_4
+         %81 = OpLogicalNot %bool %83
+               OpSelectionMerge %84 None
+               OpBranchConditional %81 %85 %84
+         %85 = OpLabel
+               OpBranch %78
+         %84 = OpLabel
+               OpStore %var_for_index %val_2
+         %89 = OpLoad %uint %i_0
+         %91 = OpAccessChain %_ptr_Function_Outer %arr_0 %89
+         %93 = OpLoad %uint %i_0
+         %95 = OpAccessChain %_ptr_Function_Outer_std140 %var_for_index %93
+         %96 = OpLoad %Outer_std140 %95
+         %92 = OpFunctionCall %Outer %conv_Outer %96
+               OpStore %91 %92
+               OpBranch %79
+         %79 = OpLabel
+         %97 = OpLoad %uint %i_0
+         %98 = OpIAdd %uint %97 %uint_1
+               OpStore %i_0 %98
+               OpBranch %77
+         %78 = OpLabel
+         %99 = OpLoad %_arr_Outer_uint_4 %arr_0
+               OpReturnValue %99
+               OpFunctionEnd
+%load_a_inner_3_a_2_m = OpFunction %mat4v4half None %100
+        %102 = OpLabel
+        %108 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %111 = OpAccessChain %_ptr_Uniform_v4half %108 %uint_0
+        %112 = OpLoad %v4half %111
+        %114 = OpAccessChain %_ptr_Uniform_v4half %108 %uint_1
+        %115 = OpLoad %v4half %114
+        %117 = OpAccessChain %_ptr_Uniform_v4half %108 %uint_2
+        %118 = OpLoad %v4half %117
+        %120 = OpAccessChain %_ptr_Uniform_v4half %108 %uint_3
+        %121 = OpLoad %v4half %120
+        %122 = OpCompositeConstruct %mat4v4half %112 %115 %118 %121
+               OpReturnValue %122
+               OpFunctionEnd
+          %f = OpFunction %void None %123
+        %126 = OpLabel
+        %129 = OpAccessChain %_ptr_Uniform__arr_Outer_std140_uint_4 %a %uint_0
+        %130 = OpLoad %_arr_Outer_std140_uint_4 %129
+        %127 = OpFunctionCall %_arr_Outer_uint_4 %conv_arr4_Outer %130
+        %133 = OpAccessChain %_ptr_Uniform_Outer_std140 %a %uint_0 %uint_3
+        %134 = OpLoad %Outer_std140 %133
+        %131 = OpFunctionCall %Outer %conv_Outer %134
+        %137 = OpAccessChain %_ptr_Uniform__arr_Inner_std140_uint_4 %a %uint_0 %uint_3 %uint_0
+        %138 = OpLoad %_arr_Inner_std140_uint_4 %137
+        %135 = OpFunctionCall %_arr_Inner_uint_4 %conv_arr4_Inner %138
+        %140 = OpAccessChain %_ptr_Uniform_Inner_std140 %a %uint_0 %uint_3 %uint_0 %uint_2
+        %141 = OpLoad %Inner_std140 %140
+        %139 = OpFunctionCall %Inner %conv_Inner %141
+        %142 = OpFunctionCall %mat4v4half %load_a_inner_3_a_2_m
+        %143 = OpAccessChain %_ptr_Uniform_v4half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1
+        %144 = OpLoad %v4half %143
+        %146 = OpAccessChain %_ptr_Uniform_half %a %uint_0 %uint_3 %uint_0 %uint_2 %uint_1 %34
+        %147 = OpLoad %half %146
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..152ccef
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,29 @@
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat4x4<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat4x4<f16> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec4<f16> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f16 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl
new file mode 100644
index 0000000..3157732
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].ywxz);
+    let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..fdce8e0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 4> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  uint2 ubo_load_9 = u[1].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..05b1dfe
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 4> t = transpose(tint_symbol(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  uint2 ubo_load_9 = u[1].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000025F14CDF3D0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..cb628a8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,81 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  f16vec4 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+f16mat4 load_u_inner_2_m() {
+  return f16mat4(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  f16mat4 t = transpose(load_u_inner_2_m());
+  float16_t l = length(u.inner[0u].m_1.ywxz);
+  float16_t a = abs(u.inner[0u].m_1.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..7f59502
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half4x4 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  half4x4 const t = transpose((*(tint_symbol))[2].m);
+  half const l = length(half4((*(tint_symbol))[0].m[1]).ywxz);
+  half const a = fabs(half4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..03b661d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,88 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 53
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %43 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+         %11 = OpTypeFunction %mat4v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %36 = OpTypeFunction %void
+         %44 = OpConstantNull %uint
+%load_u_inner_2_m = OpFunction %mat4v4half None %11
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %23 = OpAccessChain %_ptr_Uniform_v4half %19 %uint_1
+         %24 = OpLoad %v4half %23
+         %26 = OpAccessChain %_ptr_Uniform_v4half %19 %uint_2
+         %27 = OpLoad %v4half %26
+         %30 = OpAccessChain %_ptr_Uniform_v4half %19 %uint_3
+         %31 = OpLoad %v4half %30
+         %33 = OpAccessChain %_ptr_Uniform_v4half %19 %uint_4
+         %34 = OpLoad %v4half %33
+         %35 = OpCompositeConstruct %mat4v4half %24 %27 %31 %34
+               OpReturnValue %35
+               OpFunctionEnd
+          %f = OpFunction %void None %36
+         %39 = OpLabel
+         %41 = OpFunctionCall %mat4v4half %load_u_inner_2_m
+         %40 = OpTranspose %mat4v4half %41
+         %45 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %44 %uint_2
+         %46 = OpLoad %v4half %45
+         %47 = OpVectorShuffle %v4half %46 %46 1 3 0 2
+         %42 = OpExtInst %half %43 Length %47
+         %49 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %44 %uint_2
+         %50 = OpLoad %v4half %49
+         %51 = OpVectorShuffle %v4half %50 %50 1 3 0 2
+         %52 = OpCompositeExtract %half %51 0
+         %48 = OpExtInst %half %43 FAbs %52
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..125c5e3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,17 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].ywxz);
+  let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl
new file mode 100644
index 0000000..8842fbe
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl

@@ -0,0 +1,25 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat4x4<f16>) {}
+fn d(v : vec4<f16>) {}
+fn e(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].ywxz);
+    e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9b871b4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,82 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 4, 4> m) {
+}
+
+void d(vector<float16_t, 4> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  d(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  uint2 ubo_load_9 = u[1].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  e(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e6dbe02
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,87 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(matrix<float16_t, 4, 4> m) {
+}
+
+void d(vector<float16_t, 4> v) {
+}
+
+void e(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 256u));
+  c(tint_symbol_3(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  d(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  uint2 ubo_load_9 = u[1].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  e(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000151DEE459C0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..21ad3bf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,112 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  f16vec4 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(f16mat4 m) {
+}
+
+void d(f16vec4 v) {
+}
+
+void e(float16_t f_1) {
+}
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat4(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4 load_u_inner_2_m() {
+  return f16mat4(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  a(conv_arr4_S(u.inner));
+  b(conv_S(u.inner[2u]));
+  c(load_u_inner_2_m());
+  d(u.inner[0u].m_1.ywxz);
+  e(u.inner[0u].m_1.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..7524e8b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half4x4 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(half4x4 m) {
+}
+
+void d(half4 v) {
+}
+
+void e(half f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(half4((*(tint_symbol))[0].m[1]).ywxz);
+  e(half4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..c830e91
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,215 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 128
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat4v4half = OpTypeMatrix %v4half 4
+          %S = OpTypeStruct %int %mat4v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+         %11 = OpTypeFunction %void %_arr_S_uint_4
+         %19 = OpTypeFunction %void %S
+         %23 = OpTypeFunction %void %mat4v4half
+         %27 = OpTypeFunction %void %v4half
+         %31 = OpTypeFunction %void %half
+         %35 = OpTypeFunction %S %S_std140
+         %47 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %53 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %56 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %69 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %82 = OpTypeFunction %mat4v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_3 = OpConstant %uint 3
+        %105 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+          %a = OpFunction %void None %11
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %19
+          %s = OpFunctionParameter %S
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %23
+          %m = OpFunctionParameter %mat4v4half
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %27
+          %v = OpFunctionParameter %v4half
+         %30 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %31
+        %f_1 = OpFunctionParameter %half
+         %34 = OpLabel
+               OpReturn
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %35
+        %val = OpFunctionParameter %S_std140
+         %38 = OpLabel
+         %39 = OpCompositeExtract %int %val 0
+         %40 = OpCompositeExtract %v4half %val 1
+         %41 = OpCompositeExtract %v4half %val 2
+         %42 = OpCompositeExtract %v4half %val 3
+         %43 = OpCompositeExtract %v4half %val 4
+         %44 = OpCompositeConstruct %mat4v4half %40 %41 %42 %43
+         %45 = OpCompositeExtract %int %val 5
+         %46 = OpCompositeConstruct %S %39 %44 %45
+               OpReturnValue %46
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %47
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %50 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %53
+          %i = OpVariable %_ptr_Function_uint Function %56
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %69
+               OpBranch %57
+         %57 = OpLabel
+               OpLoopMerge %58 %59 None
+               OpBranch %60
+         %60 = OpLabel
+         %62 = OpLoad %uint %i
+         %63 = OpULessThan %bool %62 %uint_4
+         %61 = OpLogicalNot %bool %63
+               OpSelectionMerge %65 None
+               OpBranchConditional %61 %66 %65
+         %66 = OpLabel
+               OpBranch %58
+         %65 = OpLabel
+               OpStore %var_for_index %val_0
+         %70 = OpLoad %uint %i
+         %72 = OpAccessChain %_ptr_Function_S %arr %70
+         %74 = OpLoad %uint %i
+         %76 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %74
+         %77 = OpLoad %S_std140 %76
+         %73 = OpFunctionCall %S %conv_S %77
+               OpStore %72 %73
+               OpBranch %59
+         %59 = OpLabel
+         %78 = OpLoad %uint %i
+         %80 = OpIAdd %uint %78 %uint_1
+               OpStore %i %80
+               OpBranch %57
+         %58 = OpLabel
+         %81 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %81
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v4half None %82
+         %84 = OpLabel
+         %89 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %92 = OpAccessChain %_ptr_Uniform_v4half %89 %uint_1
+         %93 = OpLoad %v4half %92
+         %95 = OpAccessChain %_ptr_Uniform_v4half %89 %uint_2
+         %96 = OpLoad %v4half %95
+         %99 = OpAccessChain %_ptr_Uniform_v4half %89 %uint_3
+        %100 = OpLoad %v4half %99
+        %102 = OpAccessChain %_ptr_Uniform_v4half %89 %uint_4
+        %103 = OpLoad %v4half %102
+        %104 = OpCompositeConstruct %mat4v4half %93 %96 %100 %103
+               OpReturnValue %104
+               OpFunctionEnd
+          %f = OpFunction %void None %105
+        %107 = OpLabel
+        %111 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %112 = OpLoad %_arr_S_std140_uint_4 %111
+        %109 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %112
+        %108 = OpFunctionCall %void %a %109
+        %115 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %116 = OpLoad %S_std140 %115
+        %114 = OpFunctionCall %S %conv_S %116
+        %113 = OpFunctionCall %void %b %114
+        %118 = OpFunctionCall %mat4v4half %load_u_inner_2_m
+        %117 = OpFunctionCall %void %c %118
+        %120 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %56 %uint_2
+        %121 = OpLoad %v4half %120
+        %122 = OpVectorShuffle %v4half %121 %121 1 3 0 2
+        %119 = OpFunctionCall %void %d %122
+        %124 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %56 %uint_2
+        %125 = OpLoad %v4half %124
+        %126 = OpVectorShuffle %v4half %125 %125 1 3 0 2
+        %127 = OpCompositeExtract %half %126 0
+        %123 = OpFunctionCall %void %e %127
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..828eca7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat4x4<f16>) {
+}
+
+fn d(v : vec4<f16>) {
+}
+
+fn e(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].ywxz);
+  e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl
new file mode 100644
index 0000000..d9bcd28
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..8b11bb5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,64 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 4, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  p[1].m[0] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a642960
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,69 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+static S p[4] = (S[4])0;
+
+matrix<float16_t, 4, 4> tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+S tint_symbol_1(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 256u);
+  p[3].m = tint_symbol_3(u, 264u);
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  p[1].m[0] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz;
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001D8880F79A0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..c026bc2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,97 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  f16vec4 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat4(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4 load_u_inner_2_m() {
+  return f16mat4(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  p = conv_arr4_S(u.inner);
+  p[1] = conv_S(u.inner[2u]);
+  p[3].m = load_u_inner_2_m();
+  p[1].m[0] = u.inner[0u].m_1.ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..4b6869d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half4x4 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = half4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..5e02a14
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,182 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 110
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %p "p"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+          %S = OpTypeStruct %int %mat4v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %16 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %16
+         %17 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %37 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %50 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %63 = OpTypeFunction %mat4v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %86 = OpTypeFunction %void
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_3 = OpConstant %int 3
+%_ptr_Private_mat4v4half = OpTypePointer Private %mat4v4half
+        %104 = OpConstantNull %int
+%_ptr_Private_v4half = OpTypePointer Private %v4half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v4half %val 1
+         %23 = OpCompositeExtract %v4half %val 2
+         %24 = OpCompositeExtract %v4half %val 3
+         %25 = OpCompositeExtract %v4half %val 4
+         %26 = OpCompositeConstruct %mat4v4half %22 %23 %24 %25
+         %27 = OpCompositeExtract %int %val 5
+         %28 = OpCompositeConstruct %S %21 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %16
+          %i = OpVariable %_ptr_Function_uint Function %37
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %50
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %39 %40 None
+               OpBranch %41
+         %41 = OpLabel
+         %43 = OpLoad %uint %i
+         %44 = OpULessThan %bool %43 %uint_4
+         %42 = OpLogicalNot %bool %44
+               OpSelectionMerge %46 None
+               OpBranchConditional %42 %47 %46
+         %47 = OpLabel
+               OpBranch %39
+         %46 = OpLabel
+               OpStore %var_for_index %val_0
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_S %arr %51
+         %55 = OpLoad %uint %i
+         %57 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %55
+         %58 = OpLoad %S_std140 %57
+         %54 = OpFunctionCall %S %conv_S %58
+               OpStore %53 %54
+               OpBranch %40
+         %40 = OpLabel
+         %59 = OpLoad %uint %i
+         %61 = OpIAdd %uint %59 %uint_1
+               OpStore %i %61
+               OpBranch %38
+         %39 = OpLabel
+         %62 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %62
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v4half None %63
+         %65 = OpLabel
+         %70 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %73 = OpAccessChain %_ptr_Uniform_v4half %70 %uint_1
+         %74 = OpLoad %v4half %73
+         %76 = OpAccessChain %_ptr_Uniform_v4half %70 %uint_2
+         %77 = OpLoad %v4half %76
+         %80 = OpAccessChain %_ptr_Uniform_v4half %70 %uint_3
+         %81 = OpLoad %v4half %80
+         %83 = OpAccessChain %_ptr_Uniform_v4half %70 %uint_4
+         %84 = OpLoad %v4half %83
+         %85 = OpCompositeConstruct %mat4v4half %74 %77 %81 %84
+               OpReturnValue %85
+               OpFunctionEnd
+          %f = OpFunction %void None %86
+         %89 = OpLabel
+         %92 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %93 = OpLoad %_arr_S_std140_uint_4 %92
+         %90 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %93
+               OpStore %p %90
+         %96 = OpAccessChain %_ptr_Private_S %p %int_1
+         %98 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %99 = OpLoad %S_std140 %98
+         %97 = OpFunctionCall %S %conv_S %99
+               OpStore %96 %97
+        %102 = OpAccessChain %_ptr_Private_mat4v4half %p %int_3 %uint_1
+        %103 = OpFunctionCall %mat4v4half %load_u_inner_2_m
+               OpStore %102 %103
+        %106 = OpAccessChain %_ptr_Private_v4half %p %int_1 %uint_1 %104
+        %107 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %37 %uint_2
+        %108 = OpLoad %v4half %107
+        %109 = OpVectorShuffle %v4half %108 %108 1 3 0 2
+               OpStore %106 %109
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..6bf5854
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl
new file mode 100644
index 0000000..6d8ea1a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..212b921
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,86 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 4> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  s.Store<vector<float16_t, 4> >(136u, vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a426c30
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,91 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 8u), value.m);
+  buffer.Store((offset + 64u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 128u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 4> tint_symbol_8(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+S tint_symbol_6(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 128u, tint_symbol_6(u, 256u));
+  tint_symbol_3(s, 392u, tint_symbol_8(u, 264u));
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  s.Store<vector<float16_t, 4> >(136u, vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000013BAAFBAEB0(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..0a061cd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,100 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  f16vec4 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat4(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4 load_u_inner_2_m() {
+  return f16mat4(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f() {
+  s.inner = conv_arr4_S(u.inner);
+  s.inner[1] = conv_S(u.inner[2u]);
+  s.inner[3].m = load_u_inner_2_m();
+  s.inner[1].m[0] = u.inner[0u].m_1.ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..1573001
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half4x4 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = half4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..93a467a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,191 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 113
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %s "s"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %v4half %v4half %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+          %S = OpTypeStruct %int %mat4v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %17 = OpTypeFunction %S %S_std140
+         %29 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %35 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %38 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %51 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %64 = OpTypeFunction %mat4v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %87 = OpTypeFunction %void
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_3 = OpConstant %int 3
+%_ptr_StorageBuffer_mat4v4half = OpTypePointer StorageBuffer %mat4v4half
+        %107 = OpConstantNull %int
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+     %conv_S = OpFunction %S None %17
+        %val = OpFunctionParameter %S_std140
+         %20 = OpLabel
+         %21 = OpCompositeExtract %int %val 0
+         %22 = OpCompositeExtract %v4half %val 1
+         %23 = OpCompositeExtract %v4half %val 2
+         %24 = OpCompositeExtract %v4half %val 3
+         %25 = OpCompositeExtract %v4half %val 4
+         %26 = OpCompositeConstruct %mat4v4half %22 %23 %24 %25
+         %27 = OpCompositeExtract %int %val 5
+         %28 = OpCompositeConstruct %S %21 %26 %27
+               OpReturnValue %28
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %29
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %32 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %35
+          %i = OpVariable %_ptr_Function_uint Function %38
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %51
+               OpBranch %39
+         %39 = OpLabel
+               OpLoopMerge %40 %41 None
+               OpBranch %42
+         %42 = OpLabel
+         %44 = OpLoad %uint %i
+         %45 = OpULessThan %bool %44 %uint_4
+         %43 = OpLogicalNot %bool %45
+               OpSelectionMerge %47 None
+               OpBranchConditional %43 %48 %47
+         %48 = OpLabel
+               OpBranch %40
+         %47 = OpLabel
+               OpStore %var_for_index %val_0
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_S %arr %52
+         %56 = OpLoad %uint %i
+         %58 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %56
+         %59 = OpLoad %S_std140 %58
+         %55 = OpFunctionCall %S %conv_S %59
+               OpStore %54 %55
+               OpBranch %41
+         %41 = OpLabel
+         %60 = OpLoad %uint %i
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %i %62
+               OpBranch %39
+         %40 = OpLabel
+         %63 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %63
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v4half None %64
+         %66 = OpLabel
+         %71 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %74 = OpAccessChain %_ptr_Uniform_v4half %71 %uint_1
+         %75 = OpLoad %v4half %74
+         %77 = OpAccessChain %_ptr_Uniform_v4half %71 %uint_2
+         %78 = OpLoad %v4half %77
+         %81 = OpAccessChain %_ptr_Uniform_v4half %71 %uint_3
+         %82 = OpLoad %v4half %81
+         %84 = OpAccessChain %_ptr_Uniform_v4half %71 %uint_4
+         %85 = OpLoad %v4half %84
+         %86 = OpCompositeConstruct %mat4v4half %75 %78 %82 %85
+               OpReturnValue %86
+               OpFunctionEnd
+          %f = OpFunction %void None %87
+         %90 = OpLabel
+         %92 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %95 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+         %96 = OpLoad %_arr_S_std140_uint_4 %95
+         %93 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %96
+               OpStore %92 %93
+         %99 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+        %101 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %102 = OpLoad %S_std140 %101
+        %100 = OpFunctionCall %S %conv_S %102
+               OpStore %99 %100
+        %105 = OpAccessChain %_ptr_StorageBuffer_mat4v4half %s %uint_0 %int_3 %uint_1
+        %106 = OpFunctionCall %mat4v4half %load_u_inner_2_m
+               OpStore %105 %106
+        %109 = OpAccessChain %_ptr_StorageBuffer_v4half %s %uint_0 %int_1 %uint_1 %107
+        %110 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %38 %uint_2
+        %111 = OpLoad %v4half %110
+        %112 = OpVectorShuffle %v4half %111 %111 1 3 0 2
+               OpStore %109 %112
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..d07f7c3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..b794ec8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl

@@ -0,0 +1,19 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..45f86b8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,80 @@
+struct S {
+  int before;
+  matrix<float16_t, 4, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 4> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  w[1].m[0] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a09f876
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,85 @@
+SKIP: FAILED
+
+struct S {
+  int before;
+  matrix<float16_t, 4, 4> m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[32];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 4> tint_symbol_5(uint4 buffer[32], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+S tint_symbol_3(uint4 buffer[32], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 64u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 8u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[32], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 128u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 256u);
+  w[3].m = tint_symbol_5(u, 264u);
+  uint2 ubo_load_8 = u[1].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  w[1].m[0] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001355D73AE70(3,10-18): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..644ea81
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,105 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct S {
+  int before;
+  uint pad;
+  f16mat4 m;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+struct S_std140 {
+  int before;
+  uint pad;
+  f16vec4 m_0;
+  f16vec4 m_1;
+  f16vec4 m_2;
+  f16vec4 m_3;
+  uint pad_1;
+  uint pad_2;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  int after;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner[4];
+} u;
+
+shared S w[4];
+S conv_S(S_std140 val) {
+  return S(val.before, val.pad, f16mat4(val.m_0, val.m_1, val.m_2, val.m_3), val.pad_1, val.pad_2, val.pad_3, val.pad_4, val.pad_5, val.pad_6, val.after, val.pad_7, val.pad_8, val.pad_9, val.pad_10, val.pad_11, val.pad_12, val.pad_13, val.pad_14, val.pad_15, val.pad_16, val.pad_17, val.pad_18, val.pad_19, val.pad_20, val.pad_21);
+}
+
+S[4] conv_arr4_S(S_std140 val[4]) {
+  S arr[4] = S[4](S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_S(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16mat4 load_u_inner_2_m() {
+  return f16mat4(u.inner[2u].m_0, u.inner[2u].m_1, u.inner[2u].m_2, u.inner[2u].m_3);
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, f16mat4(f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf)), 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = conv_arr4_S(u.inner);
+  w[1] = conv_S(u.inner[2u]);
+  w[3].m = load_u_inner_2_m();
+  w[1].m[0] = u.inner[0u].m_1.ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..f1d4c32
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0008 */ half4x4 m;
+  /* 0x0028 */ tint_array<int8_t, 24> tint_pad_1;
+  /* 0x0040 */ int after;
+  /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = half4((*(tint_symbol_2))[0].m[1]).ywxz;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..23c6874
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,225 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 135
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "before"
+               OpMemberName %S_std140 1 "m_0"
+               OpMemberName %S_std140 2 "m_1"
+               OpMemberName %S_std140 3 "m_2"
+               OpMemberName %S_std140 4 "m_3"
+               OpMemberName %S_std140 5 "after"
+               OpName %u "u"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %w "w"
+               OpName %conv_S "conv_S"
+               OpName %val "val"
+               OpName %conv_arr4_S "conv_arr4_S"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_u_inner_2_m "load_u_inner_2_m"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpMemberDecorate %S_std140 5 Offset 64
+               OpDecorate %_arr_S_std140_uint_4 ArrayStride 128
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 8
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 8
+               OpMemberDecorate %S 2 Offset 64
+               OpDecorate %_arr_S_uint_4 ArrayStride 128
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+   %S_std140 = OpTypeStruct %int %v4half %v4half %v4half %v4half %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_std140_uint_4 = OpTypeArray %S_std140 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_S_std140_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+          %S = OpTypeStruct %int %mat4v4half %int
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+         %18 = OpTypeFunction %S %S_std140
+         %30 = OpTypeFunction %_arr_S_uint_4 %_arr_S_std140_uint_4
+%_ptr_Function__arr_S_uint_4 = OpTypePointer Function %_arr_S_uint_4
+         %36 = OpConstantNull %_arr_S_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %39 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_S_std140_uint_4 = OpTypePointer Function %_arr_S_std140_uint_4
+         %52 = OpConstantNull %_arr_S_std140_uint_4
+%_ptr_Function_S = OpTypePointer Function %S
+%_ptr_Function_S_std140 = OpTypePointer Function %S_std140
+     %uint_1 = OpConstant %uint 1
+         %65 = OpTypeFunction %mat4v4half
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %88 = OpTypeFunction %void %uint
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+        %106 = OpConstantNull %S
+   %uint_264 = OpConstant %uint 264
+%_ptr_Uniform__arr_S_std140_uint_4 = OpTypePointer Uniform %_arr_S_std140_uint_4
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat4v4half = OpTypePointer Workgroup %mat4v4half
+        %124 = OpConstantNull %int
+%_ptr_Workgroup_v4half = OpTypePointer Workgroup %v4half
+        %130 = OpTypeFunction %void
+     %conv_S = OpFunction %S None %18
+        %val = OpFunctionParameter %S_std140
+         %21 = OpLabel
+         %22 = OpCompositeExtract %int %val 0
+         %23 = OpCompositeExtract %v4half %val 1
+         %24 = OpCompositeExtract %v4half %val 2
+         %25 = OpCompositeExtract %v4half %val 3
+         %26 = OpCompositeExtract %v4half %val 4
+         %27 = OpCompositeConstruct %mat4v4half %23 %24 %25 %26
+         %28 = OpCompositeExtract %int %val 5
+         %29 = OpCompositeConstruct %S %22 %27 %28
+               OpReturnValue %29
+               OpFunctionEnd
+%conv_arr4_S = OpFunction %_arr_S_uint_4 None %30
+      %val_0 = OpFunctionParameter %_arr_S_std140_uint_4
+         %33 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_S_uint_4 Function %36
+          %i = OpVariable %_ptr_Function_uint Function %39
+%var_for_index = OpVariable %_ptr_Function__arr_S_std140_uint_4 Function %52
+               OpBranch %40
+         %40 = OpLabel
+               OpLoopMerge %41 %42 None
+               OpBranch %43
+         %43 = OpLabel
+         %45 = OpLoad %uint %i
+         %46 = OpULessThan %bool %45 %uint_4
+         %44 = OpLogicalNot %bool %46
+               OpSelectionMerge %48 None
+               OpBranchConditional %44 %49 %48
+         %49 = OpLabel
+               OpBranch %41
+         %48 = OpLabel
+               OpStore %var_for_index %val_0
+         %53 = OpLoad %uint %i
+         %55 = OpAccessChain %_ptr_Function_S %arr %53
+         %57 = OpLoad %uint %i
+         %59 = OpAccessChain %_ptr_Function_S_std140 %var_for_index %57
+         %60 = OpLoad %S_std140 %59
+         %56 = OpFunctionCall %S %conv_S %60
+               OpStore %55 %56
+               OpBranch %42
+         %42 = OpLabel
+         %61 = OpLoad %uint %i
+         %63 = OpIAdd %uint %61 %uint_1
+               OpStore %i %63
+               OpBranch %40
+         %41 = OpLabel
+         %64 = OpLoad %_arr_S_uint_4 %arr
+               OpReturnValue %64
+               OpFunctionEnd
+%load_u_inner_2_m = OpFunction %mat4v4half None %65
+         %67 = OpLabel
+         %72 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+         %75 = OpAccessChain %_ptr_Uniform_v4half %72 %uint_1
+         %76 = OpLoad %v4half %75
+         %78 = OpAccessChain %_ptr_Uniform_v4half %72 %uint_2
+         %79 = OpLoad %v4half %78
+         %82 = OpAccessChain %_ptr_Uniform_v4half %72 %uint_3
+         %83 = OpLoad %v4half %82
+         %85 = OpAccessChain %_ptr_Uniform_v4half %72 %uint_4
+         %86 = OpLoad %v4half %85
+         %87 = OpCompositeConstruct %mat4v4half %76 %79 %83 %86
+               OpReturnValue %87
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %88
+%local_invocation_index = OpFunctionParameter %uint
+         %92 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %39
+               OpStore %idx %local_invocation_index
+               OpBranch %94
+         %94 = OpLabel
+               OpLoopMerge %95 %96 None
+               OpBranch %97
+         %97 = OpLabel
+         %99 = OpLoad %uint %idx
+        %100 = OpULessThan %bool %99 %uint_4
+         %98 = OpLogicalNot %bool %100
+               OpSelectionMerge %101 None
+               OpBranchConditional %98 %102 %101
+        %102 = OpLabel
+               OpBranch %95
+        %101 = OpLabel
+        %103 = OpLoad %uint %idx
+        %105 = OpAccessChain %_ptr_Workgroup_S %w %103
+               OpStore %105 %106
+               OpBranch %96
+         %96 = OpLabel
+        %107 = OpLoad %uint %idx
+        %108 = OpIAdd %uint %107 %uint_1
+               OpStore %idx %108
+               OpBranch %94
+         %95 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+        %113 = OpAccessChain %_ptr_Uniform__arr_S_std140_uint_4 %u %uint_0
+        %114 = OpLoad %_arr_S_std140_uint_4 %113
+        %111 = OpFunctionCall %_arr_S_uint_4 %conv_arr4_S %114
+               OpStore %w %111
+        %116 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+        %118 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0 %uint_2
+        %119 = OpLoad %S_std140 %118
+        %117 = OpFunctionCall %S %conv_S %119
+               OpStore %116 %117
+        %122 = OpAccessChain %_ptr_Workgroup_mat4v4half %w %int_3 %uint_1
+        %123 = OpFunctionCall %mat4v4half %load_u_inner_2_m
+               OpStore %122 %123
+        %126 = OpAccessChain %_ptr_Workgroup_v4half %w %int_1 %uint_1 %124
+        %127 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0 %39 %uint_2
+        %128 = OpLoad %v4half %127
+        %129 = OpVectorShuffle %v4half %128 %128 1 3 0 2
+               OpStore %126 %129
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %130
+        %132 = OpLabel
+        %134 = OpLoad %uint %local_invocation_index_1
+        %133 = OpFunctionCall %void %f_inner %134
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..7788484
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,20 @@
+enable f16;
+
+struct S {
+  before : i32,
+  m : mat4x4<f16>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..aee7c0c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,32 @@
+struct Inner {
+  @size(64)
+  m : mat4x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a           = &a;
+  let p_a_i         = &((*p_a)[i()]);
+  let p_a_i_a       = &((*p_a_i).a);
+  let p_a_i_a_i     = &((*p_a_i_a)[i()]);
+  let p_a_i_a_i_m   = &((*p_a_i_a_i).m);
+  let p_a_i_a_i_m_i = &((*p_a_i_a_i_m)[i()]);
+
+
+  let l_a             : array<Outer, 4> =  *p_a;
+  let l_a_i           : Outer           =  *p_a_i;
+  let l_a_i_a         : array<Inner, 4> =  *p_a_i_a;
+  let l_a_i_a_i       : Inner           =  *p_a_i_a_i;
+  let l_a_i_a_i_m     : mat4x4<f32>     =  *p_a_i_a_i_m;
+  let l_a_i_a_i_m_i   : vec4<f32>       =  *p_a_i_a_i_m_i;
+  let l_a_i_a_i_m_i_i : f32             = (*p_a_i_a_i_m_i)[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0029647
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,77 @@
+struct Inner {
+  float4x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x4 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float4x4 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float4 l_a_i_a_i_m_i = asfloat(a[scalar_offset_4 / 4]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_5 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_5 / 4][scalar_offset_5 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0029647
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,77 @@
+struct Inner {
+  float4x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x4 tint_symbol_8(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+Inner tint_symbol_7(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_11 = {tint_symbol_8(buffer, (offset + 0u))};
+  return tint_symbol_11;
+}
+
+typedef Inner tint_symbol_6_ret[4];
+tint_symbol_6_ret tint_symbol_6(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_7(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_5(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_12 = {tint_symbol_6(buffer, (offset + 0u))};
+  return tint_symbol_12;
+}
+
+typedef Outer tint_symbol_4_ret[4];
+tint_symbol_4_ret tint_symbol_4(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_2 = 0u; (i_2 < 4u); i_2 = (i_2 + 1u)) {
+      arr_1[i_2] = tint_symbol_5(buffer, (offset + (i_2 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_a_i_save = i();
+  const int p_a_i_a_i_m_i_save = i();
+  const Outer l_a[4] = tint_symbol_4(a, 0u);
+  const Outer l_a_i = tint_symbol_5(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a[4] = tint_symbol_6(a, (256u * uint(p_a_i_save)));
+  const Inner l_a_i_a_i = tint_symbol_7(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const float4x4 l_a_i_a_i_m = tint_symbol_8(a, ((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))));
+  const uint scalar_offset_4 = ((((256u * uint(p_a_i_save)) + (64u * uint(p_a_i_a_i_save))) + (16u * uint(p_a_i_a_i_m_i_save)))) / 4;
+  const float4 l_a_i_a_i_m_i = asfloat(a[scalar_offset_4 / 4]);
+  const int tint_symbol = p_a_i_save;
+  const int tint_symbol_1 = p_a_i_a_i_save;
+  const int tint_symbol_2 = p_a_i_a_i_m_i_save;
+  const int tint_symbol_3 = i();
+  const uint scalar_offset_5 = (((((256u * uint(tint_symbol)) + (64u * uint(tint_symbol_1))) + (16u * uint(tint_symbol_2))) + (4u * uint(tint_symbol_3)))) / 4;
+  const float l_a_i_a_i_m_i_i = asfloat(a[scalar_offset_5 / 4][scalar_offset_5 % 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..0ca086c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,42 @@
+#version 310 es
+
+struct Inner {
+  mat4 m;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_a_i_save = tint_symbol_1;
+  int tint_symbol_2 = i();
+  int p_a_i_a_i_m_i_save = tint_symbol_2;
+  Outer l_a[4] = a.inner;
+  Outer l_a_i = a.inner[p_a_i_save];
+  Inner l_a_i_a[4] = a.inner[p_a_i_save].a;
+  Inner l_a_i_a_i = a.inner[p_a_i_save].a[p_a_i_a_i_save];
+  mat4 l_a_i_a_i_m = a.inner[p_a_i_save].a[p_a_i_a_i_save].m;
+  vec4 l_a_i_a_i_m_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int tint_symbol_3 = i();
+  float l_a_i_a_i_m_i_i = a.inner[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..5fc5475
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float4x4 m;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+int i() {
+  thread int tint_symbol_4 = 0;
+  tint_symbol_4 = as_type<int>((as_type<uint>(tint_symbol_4) + as_type<uint>(1)));
+  return tint_symbol_4;
+}
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol_5 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_a_i_save = tint_symbol_1;
+  int const tint_symbol_2 = i();
+  int const p_a_i_a_i_m_i_save = tint_symbol_2;
+  tint_array<Outer, 4> const l_a = *(tint_symbol_5);
+  Outer const l_a_i = (*(tint_symbol_5))[p_a_i_save];
+  tint_array<Inner, 4> const l_a_i_a = (*(tint_symbol_5))[p_a_i_save].a;
+  Inner const l_a_i_a_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save];
+  float4x4 const l_a_i_a_i_m = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m;
+  float4 const l_a_i_a_i_m_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save];
+  int const tint_symbol_3 = i();
+  float const l_a_i_a_i_m_i_i = (*(tint_symbol_5))[p_a_i_save].a[p_a_i_a_i_save].m[p_a_i_a_i_m_i_save][tint_symbol_3];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..fb735a0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,88 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 54
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+      %Inner = OpTypeStruct %mat4v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %14 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %14
+         %17 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %24 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %i = OpFunction %int None %17
+         %19 = OpLabel
+         %20 = OpLoad %int %counter
+         %22 = OpIAdd %int %20 %int_1
+               OpStore %counter %22
+         %23 = OpLoad %int %counter
+               OpReturnValue %23
+               OpFunctionEnd
+          %f = OpFunction %void None %24
+         %27 = OpLabel
+         %28 = OpFunctionCall %int %i
+         %29 = OpFunctionCall %int %i
+         %30 = OpFunctionCall %int %i
+         %33 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %34 = OpLoad %_arr_Outer_uint_4 %33
+         %36 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %28
+         %37 = OpLoad %Outer %36
+         %39 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %28 %uint_0
+         %40 = OpLoad %_arr_Inner_uint_4 %39
+         %42 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %28 %uint_0 %29
+         %43 = OpLoad %Inner %42
+         %45 = OpAccessChain %_ptr_Uniform_mat4v4float %a %uint_0 %28 %uint_0 %29 %uint_0
+         %46 = OpLoad %mat4v4float %45
+         %48 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %28 %uint_0 %29 %uint_0 %30
+         %49 = OpLoad %v4float %48
+         %50 = OpFunctionCall %int %i
+         %52 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %28 %uint_0 %29 %uint_0 %30 %50
+         %53 = OpLoad %float %52
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..06d520a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,34 @@
+struct Inner {
+  @size(64)
+  m : mat4x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_a = &((*(p_a_i)).a);
+  let p_a_i_a_i = &((*(p_a_i_a))[i()]);
+  let p_a_i_a_i_m = &((*(p_a_i_a_i)).m);
+  let p_a_i_a_i_m_i = &((*(p_a_i_a_i_m))[i()]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_i : Outer = *(p_a_i);
+  let l_a_i_a : array<Inner, 4> = *(p_a_i_a);
+  let l_a_i_a_i : Inner = *(p_a_i_a_i);
+  let l_a_i_a_i_m : mat4x4<f32> = *(p_a_i_a_i_m);
+  let l_a_i_a_i_m_i : vec4<f32> = *(p_a_i_a_i_m_i);
+  let l_a_i_a_i_m_i_i : f32 = (*(p_a_i_a_i_m_i))[i()];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..a50e0e3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,29 @@
+struct Inner {
+  @size(64)
+  m : mat4x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &a;
+  let p_a_3 = &((*p_a)[3]);
+  let p_a_3_a = &((*p_a_3).a);
+  let p_a_3_a_2 = &((*p_a_3_a)[2]);
+  let p_a_3_a_2_m = &((*p_a_3_a_2).m);
+  let p_a_3_a_2_m_1 = &((*p_a_3_a_2_m)[1]);
+
+
+  let l_a             : array<Outer, 4> = *p_a;
+  let l_a_3           : Outer           = *p_a_3;
+  let l_a_3_a         : array<Inner, 4> = *p_a_3_a;
+  let l_a_3_a_2       : Inner           = *p_a_3_a_2;
+  let l_a_3_a_2_m     : mat4x4<f32>     = *p_a_3_a_2_m;
+  let l_a_3_a_2_m_1   : vec4<f32>       = *p_a_3_a_2_m_1;
+  let l_a_3_a_2_m_1_0 : f32             = (*p_a_3_a_2_m_1)[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..045c9d3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,62 @@
+struct Inner {
+  float4x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float4x4 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float4x4 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float4 l_a_3_a_2_m_1 = asfloat(a[57]);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..045c9d3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,62 @@
+struct Inner {
+  float4x4 m;
+};
+struct Outer {
+  Inner a[4];
+};
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[64];
+};
+
+float4x4 tint_symbol_4(uint4 buffer[64], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+Inner tint_symbol_3(uint4 buffer[64], uint offset) {
+  const Inner tint_symbol_7 = {tint_symbol_4(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+typedef Inner tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[64], uint offset) {
+  Inner arr[4] = (Inner[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_3(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+Outer tint_symbol_1(uint4 buffer[64], uint offset) {
+  const Outer tint_symbol_8 = {tint_symbol_2(buffer, (offset + 0u))};
+  return tint_symbol_8;
+}
+
+typedef Outer tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[64], uint offset) {
+  Outer arr_1[4] = (Outer[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr_1[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 256u)));
+    }
+  }
+  return arr_1;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const Outer l_a[4] = tint_symbol(a, 0u);
+  const Outer l_a_3 = tint_symbol_1(a, 768u);
+  const Inner l_a_3_a[4] = tint_symbol_2(a, 768u);
+  const Inner l_a_3_a_2 = tint_symbol_3(a, 896u);
+  const float4x4 l_a_3_a_2_m = tint_symbol_4(a, 896u);
+  const float4 l_a_3_a_2_m_1 = asfloat(a[57]);
+  const float l_a_3_a_2_m_1_0 = asfloat(a[57].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..e0dae3c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+
+struct Inner {
+  mat4 m;
+};
+
+struct Outer {
+  Inner a[4];
+};
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  Outer inner[4];
+} a;
+
+void f() {
+  Outer l_a[4] = a.inner;
+  Outer l_a_3 = a.inner[3];
+  Inner l_a_3_a[4] = a.inner[3].a;
+  Inner l_a_3_a_2 = a.inner[3].a[2];
+  mat4 l_a_3_a_2_m = a.inner[3].a[2].m;
+  vec4 l_a_3_a_2_m_1 = a.inner[3].a[2].m[1];
+  float l_a_3_a_2_m_1_0 = a.inner[3].a[2].m[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..4c1c3cc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float4x4 m;
+};
+
+struct Outer {
+  /* 0x0000 */ tint_array<Inner, 4> a;
+};
+
+kernel void f(const constant tint_array<Outer, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<Outer, 4> const l_a = *(tint_symbol);
+  Outer const l_a_3 = (*(tint_symbol))[3];
+  tint_array<Inner, 4> const l_a_3_a = (*(tint_symbol))[3].a;
+  Inner const l_a_3_a_2 = (*(tint_symbol))[3].a[2];
+  float4x4 const l_a_3_a_2_m = (*(tint_symbol))[3].a[2].m;
+  float4 const l_a_3_a_2_m_1 = (*(tint_symbol))[3].a[2].m[1];
+  float const l_a_3_a_2_m_1_0 = (*(tint_symbol))[3].a[2].m[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..2ed9260
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,73 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %Outer "Outer"
+               OpMemberName %Outer 0 "a"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "m"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %Outer 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 0 ColMajor
+               OpMemberDecorate %Inner 0 MatrixStride 16
+               OpDecorate %_arr_Inner_uint_4 ArrayStride 64
+               OpDecorate %_arr_Outer_uint_4 ArrayStride 256
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+      %Inner = OpTypeStruct %mat4v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Inner_uint_4 = OpTypeArray %Inner %uint_4
+      %Outer = OpTypeStruct %_arr_Inner_uint_4
+%_arr_Outer_uint_4 = OpTypeArray %Outer %uint_4
+    %a_block = OpTypeStruct %_arr_Outer_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_Outer_uint_4 = OpTypePointer Uniform %_arr_Outer_uint_4
+        %int = OpTypeInt 32 1
+      %int_3 = OpConstant %int 3
+%_ptr_Uniform_Outer = OpTypePointer Uniform %Outer
+%_ptr_Uniform__arr_Inner_uint_4 = OpTypePointer Uniform %_arr_Inner_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_Inner = OpTypePointer Uniform %Inner
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+         %40 = OpConstantNull %int
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_Outer_uint_4 %a %uint_0
+         %20 = OpLoad %_arr_Outer_uint_4 %19
+         %24 = OpAccessChain %_ptr_Uniform_Outer %a %uint_0 %int_3
+         %25 = OpLoad %Outer %24
+         %27 = OpAccessChain %_ptr_Uniform__arr_Inner_uint_4 %a %uint_0 %int_3 %uint_0
+         %28 = OpLoad %_arr_Inner_uint_4 %27
+         %31 = OpAccessChain %_ptr_Uniform_Inner %a %uint_0 %int_3 %uint_0 %int_2
+         %32 = OpLoad %Inner %31
+         %34 = OpAccessChain %_ptr_Uniform_mat4v4float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0
+         %35 = OpLoad %mat4v4float %34
+         %38 = OpAccessChain %_ptr_Uniform_v4float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1
+         %39 = OpLoad %v4float %38
+         %42 = OpAccessChain %_ptr_Uniform_float %a %uint_0 %int_3 %uint_0 %int_2 %uint_0 %int_1 %40
+         %43 = OpLoad %float %42
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..5377dfc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,27 @@
+struct Inner {
+  @size(64)
+  m : mat4x4<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_2_m : mat4x4<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_2_m_1 : vec4<f32> = *(p_a_3_a_2_m_1);
+  let l_a_3_a_2_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl
new file mode 100644
index 0000000..d7bf513
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat4x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2].m);
+    let l = length(u[0].m[1].ywxz);
+    let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..8748244
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+
+float4x4 tint_symbol(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x4 t = transpose(tint_symbol(u, 400u));
+  const float l = length(asfloat(u[2]).ywxz);
+  const float a = abs(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8748244
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+
+float4x4 tint_symbol(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x4 t = transpose(tint_symbol(u, 400u));
+  const float l = length(asfloat(u[2]).ywxz);
+  const float a = abs(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..2fb59ea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,53 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat4 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  int after;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+  uint pad_28;
+  uint pad_29;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void f() {
+  mat4 t = transpose(u.inner[2].m);
+  float l = length(u.inner[0].m[1].ywxz);
+  float a = abs(u.inner[0].m[1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..e7981db
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float4x4 m;
+  /* 0x0050 */ tint_array<int8_t, 48> tint_pad_1;
+  /* 0x0080 */ int after;
+  /* 0x0084 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  float4x4 const t = transpose((*(tint_symbol))[2].m);
+  float const l = length(float4((*(tint_symbol))[0].m[1]).ywxz);
+  float const a = fabs(float4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..b239797
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+         %24 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 128
+               OpDecorate %_arr_S_uint_4 ArrayStride 192
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+          %S = OpTypeStruct %int %mat4v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+      %int_2 = OpConstant %int 2
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+         %25 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0 %int_2 %uint_1
+         %22 = OpLoad %mat4v4float %21
+         %16 = OpTranspose %mat4v4float %22
+         %28 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %25 %uint_1 %int_1
+         %29 = OpLoad %v4float %28
+         %30 = OpVectorShuffle %v4float %29 %29 1 3 0 2
+         %23 = OpExtInst %float %24 Length %30
+         %32 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %25 %uint_1 %int_1
+         %33 = OpLoad %v4float %32
+         %34 = OpVectorShuffle %v4float %33 %33 1 3 0 2
+         %35 = OpCompositeExtract %float %34 0
+         %31 = OpExtInst %float %24 FAbs %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..0a0e872
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,15 @@
+struct S {
+  before : i32,
+  m : mat4x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2].m);
+  let l = length(u[0].m[1].ywxz);
+  let a = abs(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl
new file mode 100644
index 0000000..0de96f6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl

@@ -0,0 +1,23 @@
+struct S {
+  before : i32,
+  m : mat4x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {}
+fn b(s : S) {}
+fn c(m : mat4x4<f32>) {}
+fn d(v : vec4<f32>) {}
+fn e(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[2]);
+    c(u[2].m);
+    d(u[0].m[1].ywxz);
+    e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..4aaff71
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float4x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float4x4 m) {
+}
+
+void d(float4 v) {
+}
+
+void e(float f_1) {
+}
+
+float4x4 tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 384u));
+  c(tint_symbol_3(u, 400u));
+  d(asfloat(u[2]).ywxz);
+  e(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..4aaff71
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,60 @@
+struct S {
+  int before;
+  float4x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(float4x4 m) {
+}
+
+void d(float4 v) {
+}
+
+void e(float f_1) {
+}
+
+float4x4 tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 384u));
+  c(tint_symbol_3(u, 400u));
+  d(asfloat(u[2]).ywxz);
+  e(asfloat(u[2]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..25c9615
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,70 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat4 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  int after;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+  uint pad_28;
+  uint pad_29;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+void a(S a_1[4]) {
+}
+
+void b(S s) {
+}
+
+void c(mat4 m) {
+}
+
+void d(vec4 v) {
+}
+
+void e(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[2]);
+  c(u.inner[2].m);
+  d(u.inner[0].m[1].ywxz);
+  e(u.inner[0].m[1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..032931f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float4x4 m;
+  /* 0x0050 */ tint_array<int8_t, 48> tint_pad_1;
+  /* 0x0080 */ int after;
+  /* 0x0084 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+void a(tint_array<S, 4> a_1) {
+}
+
+void b(S s) {
+}
+
+void c(float4x4 m) {
+}
+
+void d(float4 v) {
+}
+
+void e(float f_1) {
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[2]);
+  c((*(tint_symbol))[2].m);
+  d(float4((*(tint_symbol))[0].m[1]).ywxz);
+  e(float4((*(tint_symbol))[0].m[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..ca7c033
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,112 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 63
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %s "s"
+               OpName %c "c"
+               OpName %m "m"
+               OpName %d "d"
+               OpName %v "v"
+               OpName %e "e"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 128
+               OpDecorate %_arr_S_uint_4 ArrayStride 192
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+          %S = OpTypeStruct %int %mat4v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %_arr_S_uint_4
+         %17 = OpTypeFunction %void %S
+         %21 = OpTypeFunction %void %mat4v4float
+         %25 = OpTypeFunction %void %v4float
+         %29 = OpTypeFunction %void %float
+         %33 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+         %52 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %a = OpFunction %void None %12
+        %a_1 = OpFunctionParameter %_arr_S_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %s = OpFunctionParameter %S
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %m = OpFunctionParameter %mat4v4float
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+          %v = OpFunctionParameter %v4float
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %e = OpFunction %void None %29
+        %f_1 = OpFunctionParameter %float
+         %32 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %35 = OpLabel
+         %39 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %40 = OpLoad %_arr_S_uint_4 %39
+         %36 = OpFunctionCall %void %a %40
+         %44 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %45 = OpLoad %S %44
+         %41 = OpFunctionCall %void %b %45
+         %49 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0 %int_2 %uint_1
+         %50 = OpLoad %mat4v4float %49
+         %46 = OpFunctionCall %void %c %50
+         %55 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %52 %uint_1 %int_1
+         %56 = OpLoad %v4float %55
+         %57 = OpVectorShuffle %v4float %56 %56 1 3 0 2
+         %51 = OpFunctionCall %void %d %57
+         %59 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %52 %uint_1 %int_1
+         %60 = OpLoad %v4float %59
+         %61 = OpVectorShuffle %v4float %60 %60 1 3 0 2
+         %62 = OpCompositeExtract %float %61 0
+         %58 = OpFunctionCall %void %e %62
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..614b495
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,32 @@
+struct S {
+  before : i32,
+  m : mat4x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+fn a(a : array<S, 4>) {
+}
+
+fn b(s : S) {
+}
+
+fn c(m : mat4x4<f32>) {
+}
+
+fn d(v : vec4<f32>) {
+}
+
+fn e(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[2]);
+  c(u[2].m);
+  d(u[0].m[1].ywxz);
+  e(u[0].m[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl
new file mode 100644
index 0000000..aeb7b5e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[3].m = u[2].m;
+    p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..be13120
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,45 @@
+struct S {
+  int before;
+  float4x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+static S p[4] = (S[4])0;
+
+float4x4 tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 384u);
+  p[3].m = tint_symbol_3(u, 400u);
+  p[1].m[0] = asfloat(u[2]).ywxz;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..be13120
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,45 @@
+struct S {
+  int before;
+  float4x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+static S p[4] = (S[4])0;
+
+float4x4 tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+S tint_symbol_1(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_5 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_3(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_5;
+}
+
+typedef S tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 384u);
+  p[3].m = tint_symbol_3(u, 400u);
+  p[1].m[0] = asfloat(u[2]).ywxz;
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..3f9ed3c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,55 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat4 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  int after;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+  uint pad_28;
+  uint pad_29;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+S p[4] = S[4](S(0, 0u, 0u, 0u, mat4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u), S(0, 0u, 0u, 0u, mat4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[3].m = u.inner[2].m;
+  p[1].m[0] = u.inner[0].m[1].ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..ebaafb7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float4x4 m;
+  /* 0x0050 */ tint_array<int8_t, 48> tint_pad_1;
+  /* 0x0080 */ int after;
+  /* 0x0084 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<S, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[3].m = (*(tint_symbol_1))[2].m;
+  tint_symbol[1].m[0] = float4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..6f1b407
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,78 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 128
+               OpDecorate %_arr_S_uint_4 ArrayStride 192
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+          %S = OpTypeStruct %int %mat4v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_S_uint_4 = OpTypePointer Private %_arr_S_uint_4
+         %14 = OpConstantNull %_arr_S_uint_4
+          %p = OpVariable %_ptr_Private__arr_S_uint_4 Private %14
+       %void = OpTypeVoid
+         %15 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_Private_S = OpTypePointer Private %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_Private_mat4v4float = OpTypePointer Private %mat4v4float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+         %37 = OpConstantNull %int
+%_ptr_Private_v4float = OpTypePointer Private %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %15
+         %18 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %22 = OpLoad %_arr_S_uint_4 %21
+               OpStore %p %22
+         %25 = OpAccessChain %_ptr_Private_S %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %29 = OpLoad %S %28
+               OpStore %25 %29
+         %33 = OpAccessChain %_ptr_Private_mat4v4float %p %int_3 %uint_1
+         %35 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0 %int_2 %uint_1
+         %36 = OpLoad %mat4v4float %35
+               OpStore %33 %36
+         %39 = OpAccessChain %_ptr_Private_v4float %p %int_1 %uint_1 %37
+         %41 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %37 %uint_1 %int_1
+         %42 = OpLoad %v4float %41
+         %43 = OpVectorShuffle %v4float %42 %42 1 3 0 2
+               OpStore %39 %43
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..7c8565d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat4x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<private> p : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[3].m = u[2].m;
+  p[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl
new file mode 100644
index 0000000..0dbde254
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[3].m = u[2].m;
+    s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..cc21007
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,67 @@
+struct S {
+  int before;
+  float4x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 128u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 192u)), array[i]);
+    }
+  }
+}
+
+float4x4 tint_symbol_8(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+S tint_symbol_6(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 192u, tint_symbol_6(u, 384u));
+  tint_symbol_3(s, 592u, tint_symbol_8(u, 400u));
+  s.Store4(208u, asuint(asfloat(u[2]).ywxz));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..cc21007
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,67 @@
+struct S {
+  int before;
+  float4x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_3(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, S value) {
+  buffer.Store((offset + 0u), asuint(value.before));
+  tint_symbol_3(buffer, (offset + 16u), value.m);
+  buffer.Store((offset + 128u), asuint(value.after));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, S value[4]) {
+  S array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 192u)), array[i]);
+    }
+  }
+}
+
+float4x4 tint_symbol_8(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+S tint_symbol_6(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_10 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_8(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_10;
+}
+
+typedef S tint_symbol_5_ret[4];
+tint_symbol_5_ret tint_symbol_5(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_6(buffer, (offset + (i_1 * 192u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_5(u, 0u));
+  tint_symbol_1(s, 192u, tint_symbol_6(u, 384u));
+  tint_symbol_3(s, 592u, tint_symbol_8(u, 400u));
+  s.Store4(208u, asuint(asfloat(u[2]).ywxz));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..012d60c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,58 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat4 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  int after;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+  uint pad_28;
+  uint pad_29;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  S inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[3].m = u.inner[2].m;
+  s.inner[1].m[0] = u.inner[0].m[1].ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..b049b86
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float4x4 m;
+  /* 0x0050 */ tint_array<int8_t, 48> tint_pad_1;
+  /* 0x0080 */ int after;
+  /* 0x0084 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
+  (*(tint_symbol))[1].m[0] = float4((*(tint_symbol_1))[0].m[1]).ywxz;
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..899c29c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 128
+               OpDecorate %_arr_S_uint_4 ArrayStride 192
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+          %S = OpTypeStruct %int %mat4v4float %int
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_S_uint_4 = OpTypePointer StorageBuffer %_arr_S_uint_4
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+     %uint_1 = OpConstant %uint 1
+%_ptr_StorageBuffer_mat4v4float = OpTypePointer StorageBuffer %mat4v4float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+         %38 = OpConstantNull %int
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %f = OpFunction %void None %14
+         %17 = OpLabel
+         %20 = OpAccessChain %_ptr_StorageBuffer__arr_S_uint_4 %s %uint_0
+         %22 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %23 = OpLoad %_arr_S_uint_4 %22
+               OpStore %20 %23
+         %26 = OpAccessChain %_ptr_StorageBuffer_S %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %30 = OpLoad %S %29
+               OpStore %26 %30
+         %34 = OpAccessChain %_ptr_StorageBuffer_mat4v4float %s %uint_0 %int_3 %uint_1
+         %36 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0 %int_2 %uint_1
+         %37 = OpLoad %mat4v4float %36
+               OpStore %34 %37
+         %40 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1 %uint_1 %38
+         %42 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %38 %uint_1 %int_1
+         %43 = OpLoad %v4float %42
+         %44 = OpVectorShuffle %v4float %43 %43 1 3 0 2
+               OpStore %40 %44
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..8b33398
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat4x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[3].m = u[2].m;
+  s[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..c952bdf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl

@@ -0,0 +1,17 @@
+struct S {
+  before : i32,
+  m : mat4x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[3].m = u[2].m;
+    w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5bf6274
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,61 @@
+struct S {
+  int before;
+  float4x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x4 tint_symbol_5(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+S tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 192u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 384u);
+  w[3].m = tint_symbol_5(u, 400u);
+  w[1].m[0] = asfloat(u[2]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5bf6274
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,61 @@
+struct S {
+  int before;
+  float4x4 m;
+  int after;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[48];
+};
+groupshared S w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x4 tint_symbol_5(uint4 buffer[48], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+S tint_symbol_3(uint4 buffer[48], uint offset) {
+  const uint scalar_offset_4 = ((offset + 0u)) / 4;
+  const uint scalar_offset_5 = ((offset + 128u)) / 4;
+  const S tint_symbol_8 = {asint(buffer[scalar_offset_4 / 4][scalar_offset_4 % 4]), tint_symbol_5(buffer, (offset + 16u)), asint(buffer[scalar_offset_5 / 4][scalar_offset_5 % 4])};
+  return tint_symbol_8;
+}
+
+typedef S tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[48], uint offset) {
+  S arr[4] = (S[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 192u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      const S tint_symbol_7 = (S)0;
+      w[i] = tint_symbol_7;
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 384u);
+  w[3].m = tint_symbol_5(u, 400u);
+  w[1].m[0] = asfloat(u[2]).ywxz;
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..327158d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,63 @@
+#version 310 es
+
+struct S {
+  int before;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  mat4 m;
+  uint pad_3;
+  uint pad_4;
+  uint pad_5;
+  uint pad_6;
+  uint pad_7;
+  uint pad_8;
+  uint pad_9;
+  uint pad_10;
+  uint pad_11;
+  uint pad_12;
+  uint pad_13;
+  uint pad_14;
+  int after;
+  uint pad_15;
+  uint pad_16;
+  uint pad_17;
+  uint pad_18;
+  uint pad_19;
+  uint pad_20;
+  uint pad_21;
+  uint pad_22;
+  uint pad_23;
+  uint pad_24;
+  uint pad_25;
+  uint pad_26;
+  uint pad_27;
+  uint pad_28;
+  uint pad_29;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner[4];
+} u;
+
+shared S w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      S tint_symbol = S(0, 0u, 0u, 0u, mat4(vec4(0.0f), vec4(0.0f), vec4(0.0f), vec4(0.0f)), 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+      w[i] = tint_symbol;
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[3].m = u.inner[2].m;
+  w[1].m[0] = u.inner[0].m[1].ywxz;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..302c388
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct S {
+  /* 0x0000 */ int before;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ float4x4 m;
+  /* 0x0050 */ tint_array<int8_t, 48> tint_pad_1;
+  /* 0x0080 */ int after;
+  /* 0x0084 */ tint_array<int8_t, 60> tint_pad_2;
+};
+
+struct tint_symbol_6 {
+  tint_array<S, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_1, const constant tint_array<S, 4>* const tint_symbol_2) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    S const tint_symbol = S{};
+    (*(tint_symbol_1))[i] = tint_symbol;
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol_1) = *(tint_symbol_2);
+  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
+  (*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
+  (*(tint_symbol_1))[1].m[0] = float4((*(tint_symbol_2))[0].m[1]).ywxz;
+}
+
+kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<S, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
+  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..220cd9b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,124 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 72
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "before"
+               OpMemberName %S 1 "m"
+               OpMemberName %S 2 "after"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %S 1 Offset 16
+               OpMemberDecorate %S 1 ColMajor
+               OpMemberDecorate %S 1 MatrixStride 16
+               OpMemberDecorate %S 2 Offset 128
+               OpDecorate %_arr_S_uint_4 ArrayStride 192
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+        %int = OpTypeInt 32 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+          %S = OpTypeStruct %int %mat4v4float %int
+     %uint_4 = OpConstant %uint 4
+%_arr_S_uint_4 = OpTypeArray %S %uint_4
+    %u_block = OpTypeStruct %_arr_S_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_S_uint_4 = OpTypePointer Workgroup %_arr_S_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_S_uint_4 Workgroup
+       %void = OpTypeVoid
+         %16 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %23 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_S = OpTypePointer Workgroup %S
+         %37 = OpConstantNull %S
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_S_uint_4 = OpTypePointer Uniform %_arr_S_uint_4
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+      %int_3 = OpConstant %int 3
+%_ptr_Workgroup_mat4v4float = OpTypePointer Workgroup %mat4v4float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+         %60 = OpConstantNull %int
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+         %67 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %16
+%local_invocation_index = OpFunctionParameter %uint
+         %20 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %23
+               OpStore %idx %local_invocation_index
+               OpBranch %24
+         %24 = OpLabel
+               OpLoopMerge %25 %26 None
+               OpBranch %27
+         %27 = OpLabel
+         %29 = OpLoad %uint %idx
+         %30 = OpULessThan %bool %29 %uint_4
+         %28 = OpLogicalNot %bool %30
+               OpSelectionMerge %32 None
+               OpBranchConditional %28 %33 %32
+         %33 = OpLabel
+               OpBranch %25
+         %32 = OpLabel
+         %34 = OpLoad %uint %idx
+         %36 = OpAccessChain %_ptr_Workgroup_S %w %34
+               OpStore %36 %37
+               OpBranch %26
+         %26 = OpLabel
+         %38 = OpLoad %uint %idx
+         %40 = OpIAdd %uint %38 %uint_1
+               OpStore %idx %40
+               OpBranch %24
+         %25 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %46 = OpAccessChain %_ptr_Uniform__arr_S_uint_4 %u %uint_0
+         %47 = OpLoad %_arr_S_uint_4 %46
+               OpStore %w %47
+         %49 = OpAccessChain %_ptr_Workgroup_S %w %int_1
+         %52 = OpAccessChain %_ptr_Uniform_S %u %uint_0 %int_2
+         %53 = OpLoad %S %52
+               OpStore %49 %53
+         %56 = OpAccessChain %_ptr_Workgroup_mat4v4float %w %int_3 %uint_1
+         %58 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0 %int_2 %uint_1
+         %59 = OpLoad %mat4v4float %58
+               OpStore %56 %59
+         %62 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1 %uint_1 %60
+         %64 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %60 %uint_1 %int_1
+         %65 = OpLoad %v4float %64
+         %66 = OpVectorShuffle %v4float %65 %65 1 3 0 2
+               OpStore %62 %66
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %67
+         %69 = OpLabel
+         %71 = OpLoad %uint %local_invocation_index_1
+         %70 = OpFunctionCall %void %f_inner %71
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..c618743
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/struct/mat4x4_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+struct S {
+  before : i32,
+  m : mat4x4<f32>,
+  @align(64) @size(16)
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[3].m = u[2].m;
+  w[1].m[0] = u[0].m[1].ywxz;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..8f28ad3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x2<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat2x2<f16> = *p_m;
+  let l_m_i : vec2<f16>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5dc66f6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,27 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 2, 2> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_2 = ((4u * uint(p_m_i_save))) / 4;
+  uint ubo_load_2 = m[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const vector<float16_t, 2> l_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..02ff823
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,32 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 2, 2> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_2 = ((4u * uint(p_m_i_save))) / 4;
+  uint ubo_load_2 = m[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const vector<float16_t, 2> l_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021290B9AF00(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..72425b7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,48 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat2 load_m_inner() {
+  return f16mat2(m.inner_0, m.inner_1);
+}
+
+f16vec2 load_m_inner_p0(uint p0) {
+  switch(p0) {
+    case 0u: {
+      return m.inner_0;
+      break;
+    }
+    case 1u: {
+      return m.inner_1;
+      break;
+    }
+    default: {
+      return f16vec2(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat2 p_m = load_m_inner();
+  int tint_symbol = i();
+  f16vec2 p_m_i = load_m_inner_p0(uint(tint_symbol));
+  f16mat2 l_m = load_m_inner();
+  f16vec2 l_m_i = load_m_inner_p0(uint(tint_symbol));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..f9562f6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant half2x2* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  half2x2 const l_m = *(tint_symbol_2);
+  half2 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..4270e43
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,93 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 54
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %load_m_inner_p0 "load_m_inner_p0"
+               OpName %p0 "p0"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 4
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%m_block_std140 = OpTypeStruct %v2half %v2half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat2v2half = OpTypeMatrix %v2half 2
+         %17 = OpTypeFunction %mat2v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+         %33 = OpTypeFunction %v2half %uint
+         %45 = OpConstantNull %v2half
+       %void = OpTypeVoid
+         %46 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat2v2half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v2half %m %uint_0
+         %27 = OpLoad %v2half %26
+         %30 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %31 = OpLoad %v2half %30
+         %32 = OpCompositeConstruct %mat2v2half %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+%load_m_inner_p0 = OpFunction %v2half None %33
+         %p0 = OpFunctionParameter %uint
+         %36 = OpLabel
+               OpSelectionMerge %37 None
+               OpSwitch %p0 %38 0 %39 1 %40
+         %39 = OpLabel
+         %41 = OpAccessChain %_ptr_Uniform_v2half %m %uint_0
+         %42 = OpLoad %v2half %41
+               OpReturnValue %42
+         %40 = OpLabel
+         %43 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %44 = OpLoad %v2half %43
+               OpReturnValue %44
+         %38 = OpLabel
+               OpReturnValue %45
+         %37 = OpLabel
+               OpReturnValue %45
+               OpFunctionEnd
+          %f = OpFunction %void None %46
+         %49 = OpLabel
+         %50 = OpFunctionCall %int %i
+         %51 = OpFunctionCall %mat2v2half %load_m_inner
+         %53 = OpBitcast %uint %50
+         %52 = OpFunctionCall %v2half %load_m_inner_p0 %53
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..2ba6a33
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x2<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat2x2<f16> = *(p_m);
+  let l_m_i : vec2<f16> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..62c686f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x2<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat2x2<f16> = *p_m;
+  let l_m_1 : vec2<f16>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b381bd2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,25 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 2> l_m = tint_symbol(m, 0u);
+  uint ubo_load_2 = m[0].y;
+  const vector<float16_t, 2> l_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d28f04f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,30 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 2> l_m = tint_symbol(m, 0u);
+  uint ubo_load_2 = m[0].y;
+  const vector<float16_t, 2> l_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001899F313180(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..c17ae4c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+} m;
+
+f16mat2 load_m_inner() {
+  return f16mat2(m.inner_0, m.inner_1);
+}
+
+void f() {
+  f16mat2 p_m = load_m_inner();
+  f16vec2 p_m_1 = m.inner_1;
+  f16mat2 l_m = load_m_inner();
+  f16vec2 l_m_1 = m.inner_1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..59df3ba
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant half2x2* tint_symbol_1 [[buffer(0)]]) {
+  half2x2 const l_m = *(tint_symbol_1);
+  half2 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..7ec1f84
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,70 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 4
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%m_block_std140 = OpTypeStruct %v2half %v2half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat2v2half = OpTypeMatrix %v2half 2
+         %17 = OpTypeFunction %mat2v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat2v2half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v2half %m %uint_0
+         %27 = OpLoad %v2half %26
+         %30 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %31 = OpLoad %v2half %30
+         %32 = OpCompositeConstruct %mat2v2half %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %37 = OpFunctionCall %mat2v2half %load_m_inner
+         %38 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %39 = OpLoad %v2half %38
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..5f02f84
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x2<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat2x2<f16> = *(p_m);
+  let l_m_1 : vec2<f16> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl
new file mode 100644
index 0000000..ac0adc1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7946311
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 2> t = transpose(tint_symbol(u, 0u));
+  uint ubo_load_2 = u[0].y;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+  uint ubo_load_3 = u[0].x;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..aedd56c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,26 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 2> t = transpose(tint_symbol(u, 0u));
+  uint ubo_load_2 = u[0].y;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+  uint ubo_load_3 = u[0].x;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001C84DC29980(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..50c298f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+} u;
+
+f16mat2 load_u_inner() {
+  return f16mat2(u.inner_0, u.inner_1);
+}
+
+void f() {
+  f16mat2 t = transpose(load_u_inner());
+  float16_t l = length(u.inner_1);
+  float16_t a = abs(u.inner_0.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..2b5e676
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half2x2* tint_symbol [[buffer(0)]]) {
+  half2x2 const t = transpose(*(tint_symbol));
+  half const l = length((*(tint_symbol))[1]);
+  half const a = fabs(half2((*(tint_symbol))[0]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..783d7a5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,63 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 38
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %29 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v2half = OpTypeMatrix %v2half 2
+          %6 = OpTypeFunction %mat2v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %22 = OpTypeFunction %void
+         %36 = OpConstantNull %uint
+%load_u_inner = OpFunction %mat2v2half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %16 = OpLoad %v2half %15
+         %19 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %20 = OpLoad %v2half %19
+         %21 = OpCompositeConstruct %mat2v2half %16 %20
+               OpReturnValue %21
+               OpFunctionEnd
+          %f = OpFunction %void None %22
+         %25 = OpLabel
+         %27 = OpFunctionCall %mat2v2half %load_u_inner
+         %26 = OpTranspose %mat2v2half %27
+         %30 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %31 = OpLoad %v2half %30
+         %28 = OpExtInst %half %29 Length %31
+         %33 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %34 = OpLoad %v2half %33
+         %35 = OpVectorShuffle %v2half %34 %34 1 0
+         %37 = OpCompositeExtract %half %35 0
+         %32 = OpExtInst %half %29 FAbs %37
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..85c4b9e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl
new file mode 100644
index 0000000..6a59efb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x2<f16>;
+
+fn a(m : mat2x2<f16>) {}
+fn b(v : vec2<f16>) {}
+fn c(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].yx);
+    c(u[1].x);
+    c(u[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..6e0c6f2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,33 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+void a(matrix<float16_t, 2, 2> m) {
+}
+
+void b(vector<float16_t, 2> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint ubo_load_2 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+  uint ubo_load_3 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx);
+  c(float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  uint ubo_load_4 = u[0].y;
+  c(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a5b412f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,40 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+void a(matrix<float16_t, 2, 2> m) {
+}
+
+void b(vector<float16_t, 2> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint ubo_load_2 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+  uint ubo_load_3 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx);
+  c(float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  uint ubo_load_4 = u[0].y;
+  c(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000020E82D03620(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000020E82D03620(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000020E82D03620(11,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..a4cd27f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,34 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+} u;
+
+void a(f16mat2 m) {
+}
+
+void b(f16vec2 v) {
+}
+
+void c(float16_t f_1) {
+}
+
+f16mat2 load_u_inner() {
+  return f16mat2(u.inner_0, u.inner_1);
+}
+
+void f() {
+  a(load_u_inner());
+  b(u.inner_1);
+  b(u.inner_1.yx);
+  c(u.inner_1[0u]);
+  c(u.inner_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..8c3ff4f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(half2x2 m) {
+}
+
+void b(half2 v) {
+}
+
+void c(half f_1) {
+}
+
+kernel void f(const constant half2x2* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(half2((*(tint_symbol))[1]).yx);
+  c((*(tint_symbol))[1][0]);
+  c(half2((*(tint_symbol))[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..e5673eb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,94 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 57
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat2v2half = OpTypeMatrix %v2half 2
+          %6 = OpTypeFunction %void %mat2v2half
+         %12 = OpTypeFunction %void %v2half
+         %16 = OpTypeFunction %void %half
+         %20 = OpTypeFunction %mat2v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+         %35 = OpTypeFunction %void
+         %48 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+          %a = OpFunction %void None %6
+          %m = OpFunctionParameter %mat2v2half
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v2half
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %half
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%load_u_inner = OpFunction %mat2v2half None %20
+         %22 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %29 = OpLoad %v2half %28
+         %32 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %33 = OpLoad %v2half %32
+         %34 = OpCompositeConstruct %mat2v2half %29 %33
+               OpReturnValue %34
+               OpFunctionEnd
+          %f = OpFunction %void None %35
+         %37 = OpLabel
+         %39 = OpFunctionCall %mat2v2half %load_u_inner
+         %38 = OpFunctionCall %void %a %39
+         %41 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %42 = OpLoad %v2half %41
+         %40 = OpFunctionCall %void %b %42
+         %44 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %45 = OpLoad %v2half %44
+         %46 = OpVectorShuffle %v2half %45 %45 1 0
+         %43 = OpFunctionCall %void %b %46
+         %50 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %48
+         %51 = OpLoad %half %50
+         %47 = OpFunctionCall %void %c %51
+         %53 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %54 = OpLoad %v2half %53
+         %55 = OpVectorShuffle %v2half %54 %54 1 0
+         %56 = OpCompositeExtract %half %55 0
+         %52 = OpFunctionCall %void %c %56
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..0a1e56c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x2<f16>;
+
+fn a(m : mat2x2<f16>) {
+}
+
+fn b(v : vec2<f16>) {
+}
+
+fn c(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].yx);
+  c(u[1].x);
+  c(u[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl
new file mode 100644
index 0000000..14cee66
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x2<f16>;
+var<private> p : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].yx;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..649090f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+static matrix<float16_t, 2, 2> p = matrix<float16_t, 2, 2>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint ubo_load_2 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  uint ubo_load_3 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx;
+  p[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..56e9dbe
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,28 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+static matrix<float16_t, 2, 2> p = matrix<float16_t, 2, 2>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint ubo_load_2 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  uint ubo_load_3 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx;
+  p[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021A30FC4890(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..47059fa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,25 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+} u;
+
+f16mat2 p = f16mat2(0.0hf, 0.0hf, 0.0hf, 0.0hf);
+f16mat2 load_u_inner() {
+  return f16mat2(u.inner_0, u.inner_1);
+}
+
+void f() {
+  p = load_u_inner();
+  p[1] = u.inner_0;
+  p[1] = u.inner_0.yx;
+  p[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..4d82450
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half2x2* tint_symbol_1 [[buffer(0)]]) {
+  thread half2x2 tint_symbol = half2x2(0.0h);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = half2((*(tint_symbol_1))[0]).yx;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..6dc9b47
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v2half = OpTypeMatrix %v2half 2
+%_ptr_Private_mat2v2half = OpTypePointer Private %mat2v2half
+          %9 = OpConstantNull %mat2v2half
+          %p = OpVariable %_ptr_Private_mat2v2half Private %9
+         %10 = OpTypeFunction %mat2v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %25 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v2half = OpTypePointer Private %v2half
+         %40 = OpConstantNull %int
+%_ptr_Private_half = OpTypePointer Private %half
+         %43 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat2v2half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %19 = OpLoad %v2half %18
+         %22 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %23 = OpLoad %v2half %22
+         %24 = OpCompositeConstruct %mat2v2half %19 %23
+               OpReturnValue %24
+               OpFunctionEnd
+          %f = OpFunction %void None %25
+         %28 = OpLabel
+         %29 = OpFunctionCall %mat2v2half %load_u_inner
+               OpStore %p %29
+         %33 = OpAccessChain %_ptr_Private_v2half %p %int_1
+         %34 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %35 = OpLoad %v2half %34
+               OpStore %33 %35
+         %36 = OpAccessChain %_ptr_Private_v2half %p %int_1
+         %37 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %38 = OpLoad %v2half %37
+         %39 = OpVectorShuffle %v2half %38 %38 1 0
+               OpStore %36 %39
+         %42 = OpAccessChain %_ptr_Private_half %p %40 %int_1
+         %45 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %43
+         %46 = OpLoad %half %45
+               OpStore %42 %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..b8bd5b6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x2<f16>;
+
+var<private> p : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].yx;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl
new file mode 100644
index 0000000..a35fbc0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x2<f16>;
+@group(0) @binding(1) var<storage, read_write> s : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].yx;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..365c3b2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+}
+
+matrix<float16_t, 2, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint ubo_load_2 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+  uint ubo_load_3 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..1883b2f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,34 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+}
+
+matrix<float16_t, 2, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint ubo_load_2 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+  uint ubo_load_3 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FAF34C3EE0(6,66-74): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FAF34C3EE0(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..78515dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,28 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat2 inner;
+} s;
+
+f16mat2 load_u_inner() {
+  return f16mat2(u.inner_0, u.inner_1);
+}
+
+void f() {
+  s.inner = load_u_inner();
+  s.inner[1] = u.inner_0;
+  s.inner[1] = u.inner_0.yx;
+  s.inner[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..52b1327
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device half2x2* tint_symbol [[buffer(1)]], const constant half2x2* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half2((*(tint_symbol_1))[0]).yx;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..7bf94ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,87 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 49
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 4
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v2half = OpTypeMatrix %v2half 2
+    %u_block = OpTypeStruct %mat2v2half
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %10 = OpTypeFunction %mat2v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %25 = OpTypeFunction %void
+%_ptr_StorageBuffer_mat2v2half = OpTypePointer StorageBuffer %mat2v2half
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+         %42 = OpConstantNull %int
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+         %45 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat2v2half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %19 = OpLoad %v2half %18
+         %22 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %23 = OpLoad %v2half %22
+         %24 = OpCompositeConstruct %mat2v2half %19 %23
+               OpReturnValue %24
+               OpFunctionEnd
+          %f = OpFunction %void None %25
+         %28 = OpLabel
+         %30 = OpAccessChain %_ptr_StorageBuffer_mat2v2half %s %uint_0
+         %31 = OpFunctionCall %mat2v2half %load_u_inner
+               OpStore %30 %31
+         %35 = OpAccessChain %_ptr_StorageBuffer_v2half %s %uint_0 %int_1
+         %36 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %37 = OpLoad %v2half %36
+               OpStore %35 %37
+         %38 = OpAccessChain %_ptr_StorageBuffer_v2half %s %uint_0 %int_1
+         %39 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %40 = OpLoad %v2half %39
+         %41 = OpVectorShuffle %v2half %40 %40 1 0
+               OpStore %38 %41
+         %44 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %42 %int_1
+         %47 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %45
+         %48 = OpLoad %half %47
+               OpStore %44 %48
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..eb79e59
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x2<f16>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].yx;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..8702c67
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x2<f16>;
+var<workgroup> w : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].yx;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3bf9f07
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,35 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+groupshared matrix<float16_t, 2, 2> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 2, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint ubo_load_2 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  uint ubo_load_3 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx;
+  w[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..90c5813
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,40 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+groupshared matrix<float16_t, 2, 2> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 2, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint ubo_load_2 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16)));
+  uint ubo_load_3 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))).yx;
+  w[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000022F004DAD50(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..5f2c7dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+} u;
+
+shared f16mat2 w;
+f16mat2 load_u_inner() {
+  return f16mat2(u.inner_0, u.inner_1);
+}
+
+void f(uint local_invocation_index) {
+  {
+    w = f16mat2(f16vec2(0.0hf), f16vec2(0.0hf));
+  }
+  barrier();
+  w = load_u_inner();
+  w[1] = u.inner_0;
+  w[1] = u.inner_0.yx;
+  w[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..270a66d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  half2x2 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup half2x2* const tint_symbol, const constant half2x2* const tint_symbol_1) {
+  {
+    *(tint_symbol) = half2x2(half2(0.0h), half2(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half2((*(tint_symbol_1))[0]).yx;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant half2x2* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup half2x2* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..b174799
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,95 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 58
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v2half = OpTypeMatrix %v2half 2
+%_ptr_Workgroup_mat2v2half = OpTypePointer Workgroup %mat2v2half
+          %w = OpVariable %_ptr_Workgroup_mat2v2half Workgroup
+         %12 = OpTypeFunction %mat2v2half
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %26 = OpTypeFunction %void %uint
+         %31 = OpConstantNull %mat2v2half
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v2half = OpTypePointer Workgroup %v2half
+         %46 = OpConstantNull %int
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+         %49 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+         %53 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat2v2half None %12
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %20 = OpLoad %v2half %19
+         %23 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %24 = OpLoad %v2half %23
+         %25 = OpCompositeConstruct %mat2v2half %20 %24
+               OpReturnValue %25
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %26
+%local_invocation_index = OpFunctionParameter %uint
+         %30 = OpLabel
+               OpStore %w %31
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %35 = OpFunctionCall %mat2v2half %load_u_inner
+               OpStore %w %35
+         %39 = OpAccessChain %_ptr_Workgroup_v2half %w %int_1
+         %40 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %41 = OpLoad %v2half %40
+               OpStore %39 %41
+         %42 = OpAccessChain %_ptr_Workgroup_v2half %w %int_1
+         %43 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %44 = OpLoad %v2half %43
+         %45 = OpVectorShuffle %v2half %44 %44 1 0
+               OpStore %42 %45
+         %48 = OpAccessChain %_ptr_Workgroup_half %w %46 %int_1
+         %51 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %49
+         %52 = OpLoad %half %51
+               OpStore %48 %52
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %53
+         %55 = OpLabel
+         %57 = OpLoad %uint %local_invocation_index_1
+         %56 = OpFunctionCall %void %f_inner %57
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..a343bf1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x2_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x2<f16>;
+
+var<workgroup> w : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].yx;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/dynamic_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/static_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/static_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_builtin.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_builtin.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_fn.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_fn.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_private.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_private.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_storage.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_storage.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat2x2/to_workgroup.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat2x2_f32/to_workgroup.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..0042446
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x3<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat2x3<f16> = *p_m;
+  let l_m_i : vec3<f16>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c493360
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,36 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 2, 3> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_2 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_5 = m[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_m_i = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a0e5889
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,41 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 2, 3> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_2 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_5 = m[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_m_i = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002573701B0D0(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..b5758b5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,48 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat2x3 load_m_inner() {
+  return f16mat2x3(m.inner_0, m.inner_1);
+}
+
+f16vec3 load_m_inner_p0(uint p0) {
+  switch(p0) {
+    case 0u: {
+      return m.inner_0;
+      break;
+    }
+    case 1u: {
+      return m.inner_1;
+      break;
+    }
+    default: {
+      return f16vec3(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat2x3 p_m = load_m_inner();
+  int tint_symbol = i();
+  f16vec3 p_m_i = load_m_inner_p0(uint(tint_symbol));
+  f16mat2x3 l_m = load_m_inner();
+  f16vec3 l_m_i = load_m_inner_p0(uint(tint_symbol));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..f4a04e8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant half2x3* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  half2x3 const l_m = *(tint_symbol_2);
+  half3 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..6e27b10
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,93 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 54
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %load_m_inner_p0 "load_m_inner_p0"
+               OpName %p0 "p0"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%m_block_std140 = OpTypeStruct %v3half %v3half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat2v3half = OpTypeMatrix %v3half 2
+         %17 = OpTypeFunction %mat2v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+         %33 = OpTypeFunction %v3half %uint
+         %45 = OpConstantNull %v3half
+       %void = OpTypeVoid
+         %46 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat2v3half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v3half %m %uint_0
+         %27 = OpLoad %v3half %26
+         %30 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %31 = OpLoad %v3half %30
+         %32 = OpCompositeConstruct %mat2v3half %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+%load_m_inner_p0 = OpFunction %v3half None %33
+         %p0 = OpFunctionParameter %uint
+         %36 = OpLabel
+               OpSelectionMerge %37 None
+               OpSwitch %p0 %38 0 %39 1 %40
+         %39 = OpLabel
+         %41 = OpAccessChain %_ptr_Uniform_v3half %m %uint_0
+         %42 = OpLoad %v3half %41
+               OpReturnValue %42
+         %40 = OpLabel
+         %43 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %44 = OpLoad %v3half %43
+               OpReturnValue %44
+         %38 = OpLabel
+               OpReturnValue %45
+         %37 = OpLabel
+               OpReturnValue %45
+               OpFunctionEnd
+          %f = OpFunction %void None %46
+         %49 = OpLabel
+         %50 = OpFunctionCall %int %i
+         %51 = OpFunctionCall %mat2v3half %load_m_inner
+         %53 = OpBitcast %uint %50
+         %52 = OpFunctionCall %v3half %load_m_inner_p0 %53
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..ba171d7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x3<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat2x3<f16> = *(p_m);
+  let l_m_i : vec3<f16> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..69415d8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x3<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat2x3<f16> = *p_m;
+  let l_m_1 : vec3<f16>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..994033f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,33 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 3> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_4 = m[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_m_1 = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..74aeaa6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,38 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 3> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_4 = m[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const vector<float16_t, 3> l_m_1 = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F0B4D13EF0(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..15bc655
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+} m;
+
+f16mat2x3 load_m_inner() {
+  return f16mat2x3(m.inner_0, m.inner_1);
+}
+
+void f() {
+  f16mat2x3 p_m = load_m_inner();
+  f16vec3 p_m_1 = m.inner_1;
+  f16mat2x3 l_m = load_m_inner();
+  f16vec3 l_m_1 = m.inner_1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..036bb82
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant half2x3* tint_symbol_1 [[buffer(0)]]) {
+  half2x3 const l_m = *(tint_symbol_1);
+  half3 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..84284de
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,70 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%m_block_std140 = OpTypeStruct %v3half %v3half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat2v3half = OpTypeMatrix %v3half 2
+         %17 = OpTypeFunction %mat2v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat2v3half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v3half %m %uint_0
+         %27 = OpLoad %v3half %26
+         %30 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %31 = OpLoad %v3half %30
+         %32 = OpCompositeConstruct %mat2v3half %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %37 = OpFunctionCall %mat2v3half %load_m_inner
+         %38 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %39 = OpLoad %v3half %38
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..450a760
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x3<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat2x3<f16> = *(p_m);
+  let l_m_1 : vec3<f16> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl
new file mode 100644
index 0000000..02280d5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c3d1652
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 2> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8774da1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,36 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 2> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000024F9111AD30(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..33cf0d2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+} u;
+
+f16mat2x3 load_u_inner() {
+  return f16mat2x3(u.inner_0, u.inner_1);
+}
+
+void f() {
+  f16mat3x2 t = transpose(load_u_inner());
+  float16_t l = length(u.inner_1);
+  float16_t a = abs(u.inner_0.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..c81aee4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half2x3* tint_symbol [[buffer(0)]]) {
+  half3x2 const t = transpose(*(tint_symbol));
+  half const l = length((*(tint_symbol))[1]);
+  half const a = fabs(half3((*(tint_symbol))[0]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..07fdde3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %31 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+          %6 = OpTypeFunction %mat2v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %22 = OpTypeFunction %void
+     %v2half = OpTypeVector %half 2
+ %mat3v2half = OpTypeMatrix %v2half 3
+         %38 = OpConstantNull %uint
+%load_u_inner = OpFunction %mat2v3half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %16 = OpLoad %v3half %15
+         %19 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %20 = OpLoad %v3half %19
+         %21 = OpCompositeConstruct %mat2v3half %16 %20
+               OpReturnValue %21
+               OpFunctionEnd
+          %f = OpFunction %void None %22
+         %25 = OpLabel
+         %29 = OpFunctionCall %mat2v3half %load_u_inner
+         %26 = OpTranspose %mat3v2half %29
+         %32 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %33 = OpLoad %v3half %32
+         %30 = OpExtInst %half %31 Length %33
+         %35 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %36 = OpLoad %v3half %35
+         %37 = OpVectorShuffle %v3half %36 %36 2 0 1
+         %39 = OpCompositeExtract %half %37 0
+         %34 = OpExtInst %half %31 FAbs %39
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..84e0049
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl
new file mode 100644
index 0000000..3d67c49
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x3<f16>;
+
+fn a(m : mat2x3<f16>) {}
+fn b(v : vec3<f16>) {}
+fn c(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].zxy);
+    c(u[1].x);
+    c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..4003a4e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,45 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+void a(matrix<float16_t, 2, 3> m) {
+}
+
+void b(vector<float16_t, 3> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+  uint2 ubo_load_5 = u[0].zw;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5aa92fe
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,52 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+void a(matrix<float16_t, 2, 3> m) {
+}
+
+void b(vector<float16_t, 3> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+  uint2 ubo_load_5 = u[0].zw;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000239ABA8AAE0(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000239ABA8AAE0(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000239ABA8AAE0(11,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..3938a63
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,34 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+} u;
+
+void a(f16mat2x3 m) {
+}
+
+void b(f16vec3 v) {
+}
+
+void c(float16_t f_1) {
+}
+
+f16mat2x3 load_u_inner() {
+  return f16mat2x3(u.inner_0, u.inner_1);
+}
+
+void f() {
+  a(load_u_inner());
+  b(u.inner_1);
+  b(u.inner_1.zxy);
+  c(u.inner_1[0u]);
+  c(u.inner_1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..339491e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(half2x3 m) {
+}
+
+void b(half3 v) {
+}
+
+void c(half f_1) {
+}
+
+kernel void f(const constant half2x3* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(half3((*(tint_symbol))[1]).zxy);
+  c((*(tint_symbol))[1][0]);
+  c(half3((*(tint_symbol))[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..c2eb2c2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,94 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 57
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat2v3half = OpTypeMatrix %v3half 2
+          %6 = OpTypeFunction %void %mat2v3half
+         %12 = OpTypeFunction %void %v3half
+         %16 = OpTypeFunction %void %half
+         %20 = OpTypeFunction %mat2v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+         %35 = OpTypeFunction %void
+         %48 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+          %a = OpFunction %void None %6
+          %m = OpFunctionParameter %mat2v3half
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v3half
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %half
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%load_u_inner = OpFunction %mat2v3half None %20
+         %22 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %29 = OpLoad %v3half %28
+         %32 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %33 = OpLoad %v3half %32
+         %34 = OpCompositeConstruct %mat2v3half %29 %33
+               OpReturnValue %34
+               OpFunctionEnd
+          %f = OpFunction %void None %35
+         %37 = OpLabel
+         %39 = OpFunctionCall %mat2v3half %load_u_inner
+         %38 = OpFunctionCall %void %a %39
+         %41 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %42 = OpLoad %v3half %41
+         %40 = OpFunctionCall %void %b %42
+         %44 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %45 = OpLoad %v3half %44
+         %46 = OpVectorShuffle %v3half %45 %45 2 0 1
+         %43 = OpFunctionCall %void %b %46
+         %50 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %48
+         %51 = OpLoad %half %50
+         %47 = OpFunctionCall %void %c %51
+         %53 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %54 = OpLoad %v3half %53
+         %55 = OpVectorShuffle %v3half %54 %54 2 0 1
+         %56 = OpCompositeExtract %half %55 0
+         %52 = OpFunctionCall %void %c %56
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..ff22761
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x3<f16>;
+
+fn a(m : mat2x3<f16>) {
+}
+
+fn b(v : vec3<f16>) {
+}
+
+fn c(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].zxy);
+  c(u[1].x);
+  c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl
new file mode 100644
index 0000000..4fa4b5a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x3<f16>;
+var<private> p : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].zxy;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5cec9ae
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,33 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+static matrix<float16_t, 2, 3> p = matrix<float16_t, 2, 3>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..9ceeb9d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,38 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+static matrix<float16_t, 2, 3> p = matrix<float16_t, 2, 3>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000017EBF6D45E0(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..ea309f4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,25 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+} u;
+
+f16mat2x3 p = f16mat2x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf);
+f16mat2x3 load_u_inner() {
+  return f16mat2x3(u.inner_0, u.inner_1);
+}
+
+void f() {
+  p = load_u_inner();
+  p[1] = u.inner_0;
+  p[1] = u.inner_0.zxy;
+  p[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..a233531
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half2x3* tint_symbol_1 [[buffer(0)]]) {
+  thread half2x3 tint_symbol = half2x3(0.0h);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = half3((*(tint_symbol_1))[0]).zxy;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..651b673
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+%_ptr_Private_mat2v3half = OpTypePointer Private %mat2v3half
+          %9 = OpConstantNull %mat2v3half
+          %p = OpVariable %_ptr_Private_mat2v3half Private %9
+         %10 = OpTypeFunction %mat2v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %25 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v3half = OpTypePointer Private %v3half
+         %40 = OpConstantNull %int
+%_ptr_Private_half = OpTypePointer Private %half
+         %43 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat2v3half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %19 = OpLoad %v3half %18
+         %22 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %23 = OpLoad %v3half %22
+         %24 = OpCompositeConstruct %mat2v3half %19 %23
+               OpReturnValue %24
+               OpFunctionEnd
+          %f = OpFunction %void None %25
+         %28 = OpLabel
+         %29 = OpFunctionCall %mat2v3half %load_u_inner
+               OpStore %p %29
+         %33 = OpAccessChain %_ptr_Private_v3half %p %int_1
+         %34 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %35 = OpLoad %v3half %34
+               OpStore %33 %35
+         %36 = OpAccessChain %_ptr_Private_v3half %p %int_1
+         %37 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %38 = OpLoad %v3half %37
+         %39 = OpVectorShuffle %v3half %38 %38 2 0 1
+               OpStore %36 %39
+         %42 = OpAccessChain %_ptr_Private_half %p %40 %int_1
+         %45 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %43
+         %46 = OpLoad %half %45
+               OpStore %42 %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..0a0d370
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x3<f16>;
+
+var<private> p : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].zxy;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl
new file mode 100644
index 0000000..e14acf2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x3<f16>;
+@group(0) @binding(1) var<storage, read_write> s : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].zxy;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..4655996
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,38 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+matrix<float16_t, 2, 3> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ddbe694
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,44 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+}
+
+matrix<float16_t, 2, 3> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000015012B07F20(6,66-74): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000015012B07F20(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..ba0c55a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,28 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat2x3 inner;
+} s;
+
+f16mat2x3 load_u_inner() {
+  return f16mat2x3(u.inner_0, u.inner_1);
+}
+
+void f() {
+  s.inner = load_u_inner();
+  s.inner[1] = u.inner_0;
+  s.inner[1] = u.inner_0.zxy;
+  s.inner[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..495bebf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device half2x3* tint_symbol [[buffer(1)]], const constant half2x3* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..67096f3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,87 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 49
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 8
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+    %u_block = OpTypeStruct %mat2v3half
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %10 = OpTypeFunction %mat2v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %25 = OpTypeFunction %void
+%_ptr_StorageBuffer_mat2v3half = OpTypePointer StorageBuffer %mat2v3half
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+         %42 = OpConstantNull %int
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+         %45 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat2v3half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %19 = OpLoad %v3half %18
+         %22 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %23 = OpLoad %v3half %22
+         %24 = OpCompositeConstruct %mat2v3half %19 %23
+               OpReturnValue %24
+               OpFunctionEnd
+          %f = OpFunction %void None %25
+         %28 = OpLabel
+         %30 = OpAccessChain %_ptr_StorageBuffer_mat2v3half %s %uint_0
+         %31 = OpFunctionCall %mat2v3half %load_u_inner
+               OpStore %30 %31
+         %35 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1
+         %36 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %37 = OpLoad %v3half %36
+               OpStore %35 %37
+         %38 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1
+         %39 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %40 = OpLoad %v3half %39
+         %41 = OpVectorShuffle %v3half %40 %40 2 0 1
+               OpStore %38 %41
+         %44 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %42 %int_1
+         %47 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %45
+         %48 = OpLoad %half %47
+               OpStore %44 %48
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..3e7f84b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x3<f16>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].zxy;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..0fec122
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x3<f16>;
+var<workgroup> w : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].zxy;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5826664
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,45 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+groupshared matrix<float16_t, 2, 3> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 3> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 2, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2285b54
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,50 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+groupshared matrix<float16_t, 2, 3> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 3> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 2, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]);
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).zxy;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000293678839C0(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..c99e017
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+} u;
+
+shared f16mat2x3 w;
+f16mat2x3 load_u_inner() {
+  return f16mat2x3(u.inner_0, u.inner_1);
+}
+
+void f(uint local_invocation_index) {
+  {
+    w = f16mat2x3(f16vec3(0.0hf), f16vec3(0.0hf));
+  }
+  barrier();
+  w = load_u_inner();
+  w[1] = u.inner_0;
+  w[1] = u.inner_0.zxy;
+  w[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..4ccd046
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  half2x3 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup half2x3* const tint_symbol, const constant half2x3* const tint_symbol_1) {
+  {
+    *(tint_symbol) = half2x3(half3(0.0h), half3(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant half2x3* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup half2x3* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..167a8c9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,95 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 58
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+%_ptr_Workgroup_mat2v3half = OpTypePointer Workgroup %mat2v3half
+          %w = OpVariable %_ptr_Workgroup_mat2v3half Workgroup
+         %12 = OpTypeFunction %mat2v3half
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %26 = OpTypeFunction %void %uint
+         %31 = OpConstantNull %mat2v3half
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v3half = OpTypePointer Workgroup %v3half
+         %46 = OpConstantNull %int
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+         %49 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+         %53 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat2v3half None %12
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %20 = OpLoad %v3half %19
+         %23 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %24 = OpLoad %v3half %23
+         %25 = OpCompositeConstruct %mat2v3half %20 %24
+               OpReturnValue %25
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %26
+%local_invocation_index = OpFunctionParameter %uint
+         %30 = OpLabel
+               OpStore %w %31
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %35 = OpFunctionCall %mat2v3half %load_u_inner
+               OpStore %w %35
+         %39 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1
+         %40 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %41 = OpLoad %v3half %40
+               OpStore %39 %41
+         %42 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1
+         %43 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %44 = OpLoad %v3half %43
+         %45 = OpVectorShuffle %v3half %44 %44 2 0 1
+               OpStore %42 %45
+         %48 = OpAccessChain %_ptr_Workgroup_half %w %46 %int_1
+         %51 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %49
+         %52 = OpLoad %half %51
+               OpStore %48 %52
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %53
+         %55 = OpLabel
+         %57 = OpLoad %uint %local_invocation_index_1
+         %56 = OpFunctionCall %void %f_inner %57
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..81f551f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x3<f16>;
+
+var<workgroup> w : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].zxy;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..13510bd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat2x3<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat2x3<f32> = *p_m;
+  let l_m_i : vec3<f32>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0cf7edc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x3 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float2x3 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_2 = ((16u * uint(p_m_i_save))) / 4;
+  const float3 l_m_i = asfloat(m[scalar_offset_2 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0cf7edc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x3 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float2x3 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_2 = ((16u * uint(p_m_i_save))) / 4;
+  const float3 l_m_i = asfloat(m[scalar_offset_2 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..8fefc25
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat2x3 inner;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_m_i_save = tint_symbol;
+  mat2x3 l_m = m.inner;
+  vec3 l_m_i = m.inner[p_m_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..cecbfbb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant float2x3* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  float2x3 const l_m = *(tint_symbol_2);
+  float3 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..8530a81
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+    %m_block = OpTypeStruct %mat2v3float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %22 = OpFunctionCall %int %i
+         %26 = OpAccessChain %_ptr_Uniform_mat2v3float %m %uint_0
+         %27 = OpLoad %mat2v3float %26
+         %29 = OpAccessChain %_ptr_Uniform_v3float %m %uint_0 %22
+         %30 = OpLoad %v3float %29
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..54241eb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat2x3<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat2x3<f32> = *(p_m);
+  let l_m_i : vec3<f32> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..3444b96
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat2x3<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat2x3<f32> = *p_m;
+  let l_m_1 : vec3<f32>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e43ed37
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,22 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x3 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x3 l_m = tint_symbol(m, 0u);
+  const float3 l_m_1 = asfloat(m[1].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e43ed37
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,22 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x3 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x3 l_m = tint_symbol(m, 0u);
+  const float3 l_m_1 = asfloat(m[1].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..804b6a8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,16 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat2x3 inner;
+} m;
+
+void f() {
+  mat2x3 l_m = m.inner;
+  vec3 l_m_1 = m.inner[1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..68130f7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant float2x3* tint_symbol_1 [[buffer(0)]]) {
+  float2x3 const l_m = *(tint_symbol_1);
+  float3 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..d325f64
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+    %m_block = OpTypeStruct %mat2v3float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %25 = OpAccessChain %_ptr_Uniform_mat2v3float %m %uint_0
+         %26 = OpLoad %mat2v3float %25
+         %28 = OpAccessChain %_ptr_Uniform_v3float %m %uint_0 %int_1
+         %29 = OpLoad %v3float %28
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..cb741be
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat2x3<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat2x3<f32> = *(p_m);
+  let l_m_1 : vec3<f32> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl
new file mode 100644
index 0000000..358bb94
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat2x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..8f85f93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+float2x3 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x2 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1].xyz));
+  const float a = abs(asfloat(u[0].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8f85f93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+float2x3 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x2 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1].xyz));
+  const float a = abs(asfloat(u[0].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..462a2df
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x3 inner;
+} u;
+
+void f() {
+  mat3x2 t = transpose(u.inner);
+  float l = length(u.inner[1]);
+  float a = abs(u.inner[0].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..96989c8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float2x3* tint_symbol [[buffer(0)]]) {
+  float3x2 const t = transpose(*(tint_symbol));
+  float const l = length((*(tint_symbol))[1]);
+  float const a = fabs(float3((*(tint_symbol))[0]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..138d236
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,53 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 32
+; Schema: 0
+               OpCapability Shader
+         %20 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+    %u_block = OpTypeStruct %mat2v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+    %v2float = OpTypeVector %float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+         %27 = OpConstantNull %int
+          %f = OpFunction %void None %7
+         %10 = OpLabel
+         %17 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0
+         %18 = OpLoad %mat2v3float %17
+         %11 = OpTranspose %mat3v2float %18
+         %24 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %25 = OpLoad %v3float %24
+         %19 = OpExtInst %float %20 Length %25
+         %28 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %27
+         %29 = OpLoad %v3float %28
+         %30 = OpVectorShuffle %v3float %29 %29 2 0 1
+         %31 = OpCompositeExtract %float %30 0
+         %26 = OpExtInst %float %20 FAbs %31
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..af3e193
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat2x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl
new file mode 100644
index 0000000..3099888
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : mat2x3<f32>;
+
+fn a(m : mat2x3<f32>) {}
+fn b(v : vec3<f32>) {}
+fn c(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].zxy);
+    c(u[1].x);
+    c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..1fc478d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(float2x3 m) {
+}
+
+void b(float3 v) {
+}
+
+void c(float f_1) {
+}
+
+float2x3 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1].xyz));
+  b(asfloat(u[1].xyz).zxy);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..1fc478d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(float2x3 m) {
+}
+
+void b(float3 v) {
+}
+
+void c(float f_1) {
+}
+
+float2x3 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1].xyz));
+  b(asfloat(u[1].xyz).zxy);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..b4d5c5e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,28 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x3 inner;
+} u;
+
+void a(mat2x3 m) {
+}
+
+void b(vec3 v) {
+}
+
+void c(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  b(u.inner[1].zxy);
+  c(u.inner[1].x);
+  c(u.inner[1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..9d84e31
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(float2x3 m) {
+}
+
+void b(float3 v) {
+}
+
+void c(float f_1) {
+}
+
+kernel void f(const constant float2x3* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(float3((*(tint_symbol))[1]).zxy);
+  c((*(tint_symbol))[1][0]);
+  c(float3((*(tint_symbol))[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..aae65de
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+    %u_block = OpTypeStruct %mat2v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void %mat2v3float
+         %12 = OpTypeFunction %void %v3float
+         %16 = OpTypeFunction %void %float
+         %20 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %a = OpFunction %void None %7
+          %m = OpFunctionParameter %mat2v3float
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v3float
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %float
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %20
+         %22 = OpLabel
+         %27 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0
+         %28 = OpLoad %mat2v3float %27
+         %23 = OpFunctionCall %void %a %28
+         %33 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %34 = OpLoad %v3float %33
+         %29 = OpFunctionCall %void %b %34
+         %36 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %37 = OpLoad %v3float %36
+         %38 = OpVectorShuffle %v3float %37 %37 2 0 1
+         %35 = OpFunctionCall %void %b %38
+         %41 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %uint_0
+         %42 = OpLoad %float %41
+         %39 = OpFunctionCall %void %c %42
+         %44 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %45 = OpLoad %v3float %44
+         %46 = OpVectorShuffle %v3float %45 %45 2 0 1
+         %47 = OpCompositeExtract %float %46 0
+         %43 = OpFunctionCall %void %c %47
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..549cefc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,19 @@
+@group(0) @binding(0) var<uniform> u : mat2x3<f32>;
+
+fn a(m : mat2x3<f32>) {
+}
+
+fn b(v : vec3<f32>) {
+}
+
+fn c(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].zxy);
+  c(u[1].x);
+  c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl
new file mode 100644
index 0000000..3b29757
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat2x3<f32>;
+var<private> p : mat2x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].zxy;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..568d66f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static float2x3 p = float2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float2x3 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0].xyz);
+  p[1] = asfloat(u[0].xyz).zxy;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..568d66f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static float2x3 p = float2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float2x3 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0].xyz);
+  p[1] = asfloat(u[0].xyz).zxy;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..ba27583
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x3 inner;
+} u;
+
+mat2x3 p = mat2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+void f() {
+  p = u.inner;
+  p[1] = u.inner[0];
+  p[1] = u.inner[0].zxy;
+  p[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..d2181af
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float2x3* tint_symbol_1 [[buffer(0)]]) {
+  thread float2x3 tint_symbol = float2x3(0.0f);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = float3((*(tint_symbol_1))[0]).zxy;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..ac9477e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+    %u_block = OpTypeStruct %mat2v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private_mat2v3float = OpTypePointer Private %mat2v3float
+          %9 = OpConstantNull %mat2v3float
+          %p = OpVariable %_ptr_Private_mat2v3float Private %9
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v3float = OpTypePointer Private %v3float
+         %23 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %17 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0
+         %18 = OpLoad %mat2v3float %17
+               OpStore %p %18
+         %22 = OpAccessChain %_ptr_Private_v3float %p %int_1
+         %25 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %23
+         %26 = OpLoad %v3float %25
+               OpStore %22 %26
+         %27 = OpAccessChain %_ptr_Private_v3float %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %23
+         %29 = OpLoad %v3float %28
+         %30 = OpVectorShuffle %v3float %29 %29 2 0 1
+               OpStore %27 %30
+         %32 = OpAccessChain %_ptr_Private_float %p %23 %int_1
+         %34 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %23
+         %35 = OpLoad %float %34
+               OpStore %32 %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..46b652d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat2x3<f32>;
+
+var<private> p : mat2x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].zxy;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl
new file mode 100644
index 0000000..9db0775
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat2x3<f32>;
+@group(0) @binding(1) var<storage, read_write> s : mat2x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].zxy;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c3906dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+}
+
+float2x3 tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store3(16u, asuint(asfloat(u[0].xyz)));
+  s.Store3(16u, asuint(asfloat(u[0].xyz).zxy));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c3906dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+}
+
+float2x3 tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store3(16u, asuint(asfloat(u[0].xyz)));
+  s.Store3(16u, asuint(asfloat(u[0].xyz).zxy));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..bd4b73c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x3 inner;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat2x3 inner;
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[0];
+  s.inner[1] = u.inner[0].zxy;
+  s.inner[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..59569b4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device float2x3* tint_symbol [[buffer(1)]], const constant float2x3* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..faf129f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+    %u_block = OpTypeStruct %mat2v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+          %9 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+         %24 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %9
+         %12 = OpLabel
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %s %uint_0
+         %18 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0
+         %19 = OpLoad %mat2v3float %18
+               OpStore %16 %19
+         %23 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1
+         %26 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %24
+         %27 = OpLoad %v3float %26
+               OpStore %23 %27
+         %28 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %24
+         %30 = OpLoad %v3float %29
+         %31 = OpVectorShuffle %v3float %30 %30 2 0 1
+               OpStore %28 %31
+         %33 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %24 %int_1
+         %35 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %24
+         %36 = OpLoad %float %35
+               OpStore %33 %36
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..198bf58
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat2x3<f32>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat2x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].zxy;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..b35fd21
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat2x3<f32>;
+var<workgroup> w : mat2x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].zxy;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..bdc3667
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared float2x3 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x3 tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float2x3((0.0f).xxx, (0.0f).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0].xyz);
+  w[1] = asfloat(u[0].xyz).zxy;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..bdc3667
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared float2x3 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x3 tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float2x3((0.0f).xxx, (0.0f).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0].xyz);
+  w[1] = asfloat(u[0].xyz).zxy;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..22145fa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x3 inner;
+} u;
+
+shared mat2x3 w;
+void f(uint local_invocation_index) {
+  {
+    w = mat2x3(vec3(0.0f), vec3(0.0f));
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[0];
+  w[1] = u.inner[0].zxy;
+  w[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..9bf471f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  float2x3 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup float2x3* const tint_symbol, const constant float2x3* const tint_symbol_1) {
+  {
+    *(tint_symbol) = float2x3(float3(0.0f), float3(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant float2x3* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup float2x3* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..5f4d380
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+    %u_block = OpTypeStruct %mat2v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup_mat2v3float = OpTypePointer Workgroup %mat2v3float
+          %w = OpVariable %_ptr_Workgroup_mat2v3float Workgroup
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %uint
+         %17 = OpConstantNull %mat2v3float
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v3float = OpTypePointer Uniform %mat2v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
+         %29 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %42 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %12
+%local_invocation_index = OpFunctionParameter %uint
+         %16 = OpLabel
+               OpStore %w %17
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %23 = OpAccessChain %_ptr_Uniform_mat2v3float %u %uint_0
+         %24 = OpLoad %mat2v3float %23
+               OpStore %w %24
+         %28 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1
+         %31 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %29
+         %32 = OpLoad %v3float %31
+               OpStore %28 %32
+         %33 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1
+         %34 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %29
+         %35 = OpLoad %v3float %34
+         %36 = OpVectorShuffle %v3float %35 %35 2 0 1
+               OpStore %33 %36
+         %38 = OpAccessChain %_ptr_Workgroup_float %w %29 %int_1
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %29
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %42
+         %44 = OpLabel
+         %46 = OpLoad %uint %local_invocation_index_1
+         %45 = OpFunctionCall %void %f_inner %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..5d72743
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat2x3<f32>;
+
+var<workgroup> w : mat2x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].zxy;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..d266aa9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x4<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat2x4<f16> = *p_m;
+  let l_m_i : vec4<f16>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..aafb4cc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,36 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 2, 4> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_2 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_5 = m[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_m_i = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..10e8ba9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,41 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 2, 4> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_2 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_5 = m[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_m_i = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F513868710(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..90a745a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,48 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat2x4 load_m_inner() {
+  return f16mat2x4(m.inner_0, m.inner_1);
+}
+
+f16vec4 load_m_inner_p0(uint p0) {
+  switch(p0) {
+    case 0u: {
+      return m.inner_0;
+      break;
+    }
+    case 1u: {
+      return m.inner_1;
+      break;
+    }
+    default: {
+      return f16vec4(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat2x4 p_m = load_m_inner();
+  int tint_symbol = i();
+  f16vec4 p_m_i = load_m_inner_p0(uint(tint_symbol));
+  f16mat2x4 l_m = load_m_inner();
+  f16vec4 l_m_i = load_m_inner_p0(uint(tint_symbol));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..7ecbd42
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant half2x4* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  half2x4 const l_m = *(tint_symbol_2);
+  half4 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..837fa42
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,93 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 54
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %load_m_inner_p0 "load_m_inner_p0"
+               OpName %p0 "p0"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%m_block_std140 = OpTypeStruct %v4half %v4half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat2v4half = OpTypeMatrix %v4half 2
+         %17 = OpTypeFunction %mat2v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+         %33 = OpTypeFunction %v4half %uint
+         %45 = OpConstantNull %v4half
+       %void = OpTypeVoid
+         %46 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat2v4half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v4half %m %uint_0
+         %27 = OpLoad %v4half %26
+         %30 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %31 = OpLoad %v4half %30
+         %32 = OpCompositeConstruct %mat2v4half %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+%load_m_inner_p0 = OpFunction %v4half None %33
+         %p0 = OpFunctionParameter %uint
+         %36 = OpLabel
+               OpSelectionMerge %37 None
+               OpSwitch %p0 %38 0 %39 1 %40
+         %39 = OpLabel
+         %41 = OpAccessChain %_ptr_Uniform_v4half %m %uint_0
+         %42 = OpLoad %v4half %41
+               OpReturnValue %42
+         %40 = OpLabel
+         %43 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %44 = OpLoad %v4half %43
+               OpReturnValue %44
+         %38 = OpLabel
+               OpReturnValue %45
+         %37 = OpLabel
+               OpReturnValue %45
+               OpFunctionEnd
+          %f = OpFunction %void None %46
+         %49 = OpLabel
+         %50 = OpFunctionCall %int %i
+         %51 = OpFunctionCall %mat2v4half %load_m_inner
+         %53 = OpBitcast %uint %50
+         %52 = OpFunctionCall %v4half %load_m_inner_p0 %53
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..d23fb14
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x4<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat2x4<f16> = *(p_m);
+  let l_m_i : vec4<f16> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..964fb95
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x4<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat2x4<f16> = *p_m;
+  let l_m_1 : vec4<f16>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..26c4533
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,33 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 4> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_4 = m[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_m_1 = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..fe05023
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,38 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 4> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_4 = m[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const vector<float16_t, 4> l_m_1 = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002523DBC4780(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..2583326
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+} m;
+
+f16mat2x4 load_m_inner() {
+  return f16mat2x4(m.inner_0, m.inner_1);
+}
+
+void f() {
+  f16mat2x4 p_m = load_m_inner();
+  f16vec4 p_m_1 = m.inner_1;
+  f16mat2x4 l_m = load_m_inner();
+  f16vec4 l_m_1 = m.inner_1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..760d2d2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant half2x4* tint_symbol_1 [[buffer(0)]]) {
+  half2x4 const l_m = *(tint_symbol_1);
+  half4 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..435bc26
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,70 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%m_block_std140 = OpTypeStruct %v4half %v4half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat2v4half = OpTypeMatrix %v4half 2
+         %17 = OpTypeFunction %mat2v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat2v4half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v4half %m %uint_0
+         %27 = OpLoad %v4half %26
+         %30 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %31 = OpLoad %v4half %30
+         %32 = OpCompositeConstruct %mat2v4half %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %37 = OpFunctionCall %mat2v4half %load_m_inner
+         %38 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %39 = OpLoad %v4half %38
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..9bbe2cf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x4<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat2x4<f16> = *(p_m);
+  let l_m_1 : vec4<f16> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl
new file mode 100644
index 0000000..2897573
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..8fc92b1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 2> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..db1a663
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,36 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 2> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000020C30943680(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..d7f7cc7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+} u;
+
+f16mat2x4 load_u_inner() {
+  return f16mat2x4(u.inner_0, u.inner_1);
+}
+
+void f() {
+  f16mat4x2 t = transpose(load_u_inner());
+  float16_t l = length(u.inner_1);
+  float16_t a = abs(u.inner_0.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..ab20cc5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half2x4* tint_symbol [[buffer(0)]]) {
+  half4x2 const t = transpose(*(tint_symbol));
+  half const l = length((*(tint_symbol))[1]);
+  half const a = fabs(half4((*(tint_symbol))[0]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..3947aef
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %31 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+          %6 = OpTypeFunction %mat2v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %22 = OpTypeFunction %void
+     %v2half = OpTypeVector %half 2
+ %mat4v2half = OpTypeMatrix %v2half 4
+         %38 = OpConstantNull %uint
+%load_u_inner = OpFunction %mat2v4half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %16 = OpLoad %v4half %15
+         %19 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %20 = OpLoad %v4half %19
+         %21 = OpCompositeConstruct %mat2v4half %16 %20
+               OpReturnValue %21
+               OpFunctionEnd
+          %f = OpFunction %void None %22
+         %25 = OpLabel
+         %29 = OpFunctionCall %mat2v4half %load_u_inner
+         %26 = OpTranspose %mat4v2half %29
+         %32 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %33 = OpLoad %v4half %32
+         %30 = OpExtInst %half %31 Length %33
+         %35 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %36 = OpLoad %v4half %35
+         %37 = OpVectorShuffle %v4half %36 %36 1 3 0 2
+         %39 = OpCompositeExtract %half %37 0
+         %34 = OpExtInst %half %31 FAbs %39
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..04c4e6d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl
new file mode 100644
index 0000000..159b73a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x4<f16>;
+
+fn a(m : mat2x4<f16>) {}
+fn b(v : vec4<f16>) {}
+fn c(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].ywxz);
+    c(u[1].x);
+    c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..82bbdcc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,45 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+void a(matrix<float16_t, 2, 4> m) {
+}
+
+void b(vector<float16_t, 4> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  b(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+  uint2 ubo_load_5 = u[0].zw;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  b(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  c(vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7305b4e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,52 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+void a(matrix<float16_t, 2, 4> m) {
+}
+
+void b(vector<float16_t, 4> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_4 = u[0].zw;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  b(vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+  uint2 ubo_load_5 = u[0].zw;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  b(vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  c(vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000273F7AEC5E0(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000273F7AEC5E0(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000273F7AEC5E0(11,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..89087d3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,34 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+} u;
+
+void a(f16mat2x4 m) {
+}
+
+void b(f16vec4 v) {
+}
+
+void c(float16_t f_1) {
+}
+
+f16mat2x4 load_u_inner() {
+  return f16mat2x4(u.inner_0, u.inner_1);
+}
+
+void f() {
+  a(load_u_inner());
+  b(u.inner_1);
+  b(u.inner_1.ywxz);
+  c(u.inner_1[0u]);
+  c(u.inner_1.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..e58cdf3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(half2x4 m) {
+}
+
+void b(half4 v) {
+}
+
+void c(half f_1) {
+}
+
+kernel void f(const constant half2x4* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(half4((*(tint_symbol))[1]).ywxz);
+  c((*(tint_symbol))[1][0]);
+  c(half4((*(tint_symbol))[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..d125966
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,94 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 57
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat2v4half = OpTypeMatrix %v4half 2
+          %6 = OpTypeFunction %void %mat2v4half
+         %12 = OpTypeFunction %void %v4half
+         %16 = OpTypeFunction %void %half
+         %20 = OpTypeFunction %mat2v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+         %35 = OpTypeFunction %void
+         %48 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+          %a = OpFunction %void None %6
+          %m = OpFunctionParameter %mat2v4half
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v4half
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %half
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%load_u_inner = OpFunction %mat2v4half None %20
+         %22 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %29 = OpLoad %v4half %28
+         %32 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %33 = OpLoad %v4half %32
+         %34 = OpCompositeConstruct %mat2v4half %29 %33
+               OpReturnValue %34
+               OpFunctionEnd
+          %f = OpFunction %void None %35
+         %37 = OpLabel
+         %39 = OpFunctionCall %mat2v4half %load_u_inner
+         %38 = OpFunctionCall %void %a %39
+         %41 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %42 = OpLoad %v4half %41
+         %40 = OpFunctionCall %void %b %42
+         %44 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %45 = OpLoad %v4half %44
+         %46 = OpVectorShuffle %v4half %45 %45 1 3 0 2
+         %43 = OpFunctionCall %void %b %46
+         %50 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %48
+         %51 = OpLoad %half %50
+         %47 = OpFunctionCall %void %c %51
+         %53 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %54 = OpLoad %v4half %53
+         %55 = OpVectorShuffle %v4half %54 %54 1 3 0 2
+         %56 = OpCompositeExtract %half %55 0
+         %52 = OpFunctionCall %void %c %56
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..a57941a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x4<f16>;
+
+fn a(m : mat2x4<f16>) {
+}
+
+fn b(v : vec4<f16>) {
+}
+
+fn c(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].ywxz);
+  c(u[1].x);
+  c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl
new file mode 100644
index 0000000..ebc8498
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x4<f16>;
+var<private> p : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].ywxz;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9781a40
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,33 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+static matrix<float16_t, 2, 4> p = matrix<float16_t, 2, 4>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..94095cb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,38 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+static matrix<float16_t, 2, 4> p = matrix<float16_t, 2, 4>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000014E53D58AD0(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..19af901
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,25 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+} u;
+
+f16mat2x4 p = f16mat2x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf);
+f16mat2x4 load_u_inner() {
+  return f16mat2x4(u.inner_0, u.inner_1);
+}
+
+void f() {
+  p = load_u_inner();
+  p[1] = u.inner_0;
+  p[1] = u.inner_0.ywxz;
+  p[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..5042d02
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half2x4* tint_symbol_1 [[buffer(0)]]) {
+  thread half2x4 tint_symbol = half2x4(0.0h);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = half4((*(tint_symbol_1))[0]).ywxz;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..910985b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+%_ptr_Private_mat2v4half = OpTypePointer Private %mat2v4half
+          %9 = OpConstantNull %mat2v4half
+          %p = OpVariable %_ptr_Private_mat2v4half Private %9
+         %10 = OpTypeFunction %mat2v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %25 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v4half = OpTypePointer Private %v4half
+         %40 = OpConstantNull %int
+%_ptr_Private_half = OpTypePointer Private %half
+         %43 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat2v4half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %19 = OpLoad %v4half %18
+         %22 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %23 = OpLoad %v4half %22
+         %24 = OpCompositeConstruct %mat2v4half %19 %23
+               OpReturnValue %24
+               OpFunctionEnd
+          %f = OpFunction %void None %25
+         %28 = OpLabel
+         %29 = OpFunctionCall %mat2v4half %load_u_inner
+               OpStore %p %29
+         %33 = OpAccessChain %_ptr_Private_v4half %p %int_1
+         %34 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %35 = OpLoad %v4half %34
+               OpStore %33 %35
+         %36 = OpAccessChain %_ptr_Private_v4half %p %int_1
+         %37 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %38 = OpLoad %v4half %37
+         %39 = OpVectorShuffle %v4half %38 %38 1 3 0 2
+               OpStore %36 %39
+         %42 = OpAccessChain %_ptr_Private_half %p %40 %int_1
+         %45 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %43
+         %46 = OpLoad %half %45
+               OpStore %42 %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..105af55
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x4<f16>;
+
+var<private> p : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].ywxz;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl
new file mode 100644
index 0000000..0a74180
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x4<f16>;
+@group(0) @binding(1) var<storage, read_write> s : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].ywxz;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..bd96423
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,38 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+matrix<float16_t, 2, 4> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8723863
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,44 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 2, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+}
+
+matrix<float16_t, 2, 4> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000015E10277FD0(6,66-74): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000015E10277FD0(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..00bc62e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,28 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat2x4 inner;
+} s;
+
+f16mat2x4 load_u_inner() {
+  return f16mat2x4(u.inner_0, u.inner_1);
+}
+
+void f() {
+  s.inner = load_u_inner();
+  s.inner[1] = u.inner_0;
+  s.inner[1] = u.inner_0.ywxz;
+  s.inner[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..44e2bbb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device half2x4* tint_symbol [[buffer(1)]], const constant half2x4* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..0350c58
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,87 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 49
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 8
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+    %u_block = OpTypeStruct %mat2v4half
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %10 = OpTypeFunction %mat2v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %25 = OpTypeFunction %void
+%_ptr_StorageBuffer_mat2v4half = OpTypePointer StorageBuffer %mat2v4half
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+         %42 = OpConstantNull %int
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+         %45 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat2v4half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %19 = OpLoad %v4half %18
+         %22 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %23 = OpLoad %v4half %22
+         %24 = OpCompositeConstruct %mat2v4half %19 %23
+               OpReturnValue %24
+               OpFunctionEnd
+          %f = OpFunction %void None %25
+         %28 = OpLabel
+         %30 = OpAccessChain %_ptr_StorageBuffer_mat2v4half %s %uint_0
+         %31 = OpFunctionCall %mat2v4half %load_u_inner
+               OpStore %30 %31
+         %35 = OpAccessChain %_ptr_StorageBuffer_v4half %s %uint_0 %int_1
+         %36 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %37 = OpLoad %v4half %36
+               OpStore %35 %37
+         %38 = OpAccessChain %_ptr_StorageBuffer_v4half %s %uint_0 %int_1
+         %39 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %40 = OpLoad %v4half %39
+         %41 = OpVectorShuffle %v4half %40 %40 1 3 0 2
+               OpStore %38 %41
+         %44 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %42 %int_1
+         %47 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %45
+         %48 = OpLoad %half %47
+               OpStore %44 %48
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..261f1c3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x4<f16>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].ywxz;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..b74dcd9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x4<f16>;
+var<workgroup> w : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].ywxz;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..dc8da4d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,45 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+groupshared matrix<float16_t, 2, 4> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 4> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 2, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b2c263b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,50 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+groupshared matrix<float16_t, 2, 4> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 2, 4> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 2, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_4 = u[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]);
+  uint2 ubo_load_5 = u[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_5_yw = vector<float16_t, 2>(f16tof32(ubo_load_5 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_5_xz[0], ubo_load_5_yw[0], ubo_load_5_xz[1], ubo_load_5_yw[1]).ywxz;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000018E62EDA940(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..03193cd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+} u;
+
+shared f16mat2x4 w;
+f16mat2x4 load_u_inner() {
+  return f16mat2x4(u.inner_0, u.inner_1);
+}
+
+void f(uint local_invocation_index) {
+  {
+    w = f16mat2x4(f16vec4(0.0hf), f16vec4(0.0hf));
+  }
+  barrier();
+  w = load_u_inner();
+  w[1] = u.inner_0;
+  w[1] = u.inner_0.ywxz;
+  w[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..879dc14
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  half2x4 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup half2x4* const tint_symbol, const constant half2x4* const tint_symbol_1) {
+  {
+    *(tint_symbol) = half2x4(half4(0.0h), half4(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant half2x4* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup half2x4* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..007777b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,95 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 58
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+%_ptr_Workgroup_mat2v4half = OpTypePointer Workgroup %mat2v4half
+          %w = OpVariable %_ptr_Workgroup_mat2v4half Workgroup
+         %12 = OpTypeFunction %mat2v4half
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %26 = OpTypeFunction %void %uint
+         %31 = OpConstantNull %mat2v4half
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v4half = OpTypePointer Workgroup %v4half
+         %46 = OpConstantNull %int
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+         %49 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+         %53 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat2v4half None %12
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %20 = OpLoad %v4half %19
+         %23 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %24 = OpLoad %v4half %23
+         %25 = OpCompositeConstruct %mat2v4half %20 %24
+               OpReturnValue %25
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %26
+%local_invocation_index = OpFunctionParameter %uint
+         %30 = OpLabel
+               OpStore %w %31
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %35 = OpFunctionCall %mat2v4half %load_u_inner
+               OpStore %w %35
+         %39 = OpAccessChain %_ptr_Workgroup_v4half %w %int_1
+         %40 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %41 = OpLoad %v4half %40
+               OpStore %39 %41
+         %42 = OpAccessChain %_ptr_Workgroup_v4half %w %int_1
+         %43 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %44 = OpLoad %v4half %43
+         %45 = OpVectorShuffle %v4half %44 %44 1 3 0 2
+               OpStore %42 %45
+         %48 = OpAccessChain %_ptr_Workgroup_half %w %46 %int_1
+         %51 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %49
+         %52 = OpLoad %half %51
+               OpStore %48 %52
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %53
+         %55 = OpLabel
+         %57 = OpLoad %uint %local_invocation_index_1
+         %56 = OpFunctionCall %void %f_inner %57
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..1ba1243
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x4<f16>;
+
+var<workgroup> w : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].ywxz;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..cae6dfd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat2x4<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat2x4<f32> = *p_m;
+  let l_m_i : vec4<f32>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c31f9b9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float2x4 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_2 = ((16u * uint(p_m_i_save))) / 4;
+  const float4 l_m_i = asfloat(m[scalar_offset_2 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c31f9b9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float2x4 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_2 = ((16u * uint(p_m_i_save))) / 4;
+  const float4 l_m_i = asfloat(m[scalar_offset_2 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..f993ff6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat2x4 inner;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_m_i_save = tint_symbol;
+  mat2x4 l_m = m.inner;
+  vec4 l_m_i = m.inner[p_m_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..5c7026b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant float2x4* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  float2x4 const l_m = *(tint_symbol_2);
+  float4 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..bce9daa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+    %m_block = OpTypeStruct %mat2v4float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %22 = OpFunctionCall %int %i
+         %26 = OpAccessChain %_ptr_Uniform_mat2v4float %m %uint_0
+         %27 = OpLoad %mat2v4float %26
+         %29 = OpAccessChain %_ptr_Uniform_v4float %m %uint_0 %22
+         %30 = OpLoad %v4float %29
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..fa24961
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat2x4<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat2x4<f32> = *(p_m);
+  let l_m_i : vec4<f32> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..8f94035
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat2x4<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat2x4<f32> = *p_m;
+  let l_m_1 : vec4<f32>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5f5a358
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,22 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x4 l_m = tint_symbol(m, 0u);
+  const float4 l_m_1 = asfloat(m[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5f5a358
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,22 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float2x4 l_m = tint_symbol(m, 0u);
+  const float4 l_m_1 = asfloat(m[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..3c1a06f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,16 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat2x4 inner;
+} m;
+
+void f() {
+  mat2x4 l_m = m.inner;
+  vec4 l_m_1 = m.inner[1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..51566cb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant float2x4* tint_symbol_1 [[buffer(0)]]) {
+  float2x4 const l_m = *(tint_symbol_1);
+  float4 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..5405654
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+    %m_block = OpTypeStruct %mat2v4float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %25 = OpAccessChain %_ptr_Uniform_mat2v4float %m %uint_0
+         %26 = OpLoad %mat2v4float %25
+         %28 = OpAccessChain %_ptr_Uniform_v4float %m %uint_0 %int_1
+         %29 = OpLoad %v4float %28
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..5129b61
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat2x4<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat2x4<f32> = *(p_m);
+  let l_m_1 : vec4<f32> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl
new file mode 100644
index 0000000..81d1c25
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..77df7cf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x2 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1]));
+  const float a = abs(asfloat(u[0]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..77df7cf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x2 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1]));
+  const float a = abs(asfloat(u[0]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..6d9a4df
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x4 inner;
+} u;
+
+void f() {
+  mat4x2 t = transpose(u.inner);
+  float l = length(u.inner[1]);
+  float a = abs(u.inner[0].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..e4ca0da
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float2x4* tint_symbol [[buffer(0)]]) {
+  float4x2 const t = transpose(*(tint_symbol));
+  float const l = length((*(tint_symbol))[1]);
+  float const a = fabs(float4((*(tint_symbol))[0]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..f1231b5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,53 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 32
+; Schema: 0
+               OpCapability Shader
+         %20 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+    %u_block = OpTypeStruct %mat2v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+    %v2float = OpTypeVector %float 2
+%mat4v2float = OpTypeMatrix %v2float 4
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+         %27 = OpConstantNull %int
+          %f = OpFunction %void None %7
+         %10 = OpLabel
+         %17 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0
+         %18 = OpLoad %mat2v4float %17
+         %11 = OpTranspose %mat4v2float %18
+         %24 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %25 = OpLoad %v4float %24
+         %19 = OpExtInst %float %20 Length %25
+         %28 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %27
+         %29 = OpLoad %v4float %28
+         %30 = OpVectorShuffle %v4float %29 %29 1 3 0 2
+         %31 = OpCompositeExtract %float %30 0
+         %26 = OpExtInst %float %20 FAbs %31
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..58e7bdd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl
new file mode 100644
index 0000000..227b46c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : mat2x4<f32>;
+
+fn a(m : mat2x4<f32>) {}
+fn b(v : vec4<f32>) {}
+fn c(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].ywxz);
+    c(u[1].x);
+    c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5f64389
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(float2x4 m) {
+}
+
+void b(float4 v) {
+}
+
+void c(float f_1) {
+}
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1]));
+  b(asfloat(u[1]).ywxz);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..5f64389
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(float2x4 m) {
+}
+
+void b(float4 v) {
+}
+
+void c(float f_1) {
+}
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1]));
+  b(asfloat(u[1]).ywxz);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..0abb3cd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,28 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x4 inner;
+} u;
+
+void a(mat2x4 m) {
+}
+
+void b(vec4 v) {
+}
+
+void c(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  b(u.inner[1].ywxz);
+  c(u.inner[1].x);
+  c(u.inner[1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..c0abb91
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(float2x4 m) {
+}
+
+void b(float4 v) {
+}
+
+void c(float f_1) {
+}
+
+kernel void f(const constant float2x4* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(float4((*(tint_symbol))[1]).ywxz);
+  c((*(tint_symbol))[1][0]);
+  c(float4((*(tint_symbol))[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..12cb8ec
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+    %u_block = OpTypeStruct %mat2v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void %mat2v4float
+         %12 = OpTypeFunction %void %v4float
+         %16 = OpTypeFunction %void %float
+         %20 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %a = OpFunction %void None %7
+          %m = OpFunctionParameter %mat2v4float
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v4float
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %float
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %20
+         %22 = OpLabel
+         %27 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0
+         %28 = OpLoad %mat2v4float %27
+         %23 = OpFunctionCall %void %a %28
+         %33 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %34 = OpLoad %v4float %33
+         %29 = OpFunctionCall %void %b %34
+         %36 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %37 = OpLoad %v4float %36
+         %38 = OpVectorShuffle %v4float %37 %37 1 3 0 2
+         %35 = OpFunctionCall %void %b %38
+         %41 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %uint_0
+         %42 = OpLoad %float %41
+         %39 = OpFunctionCall %void %c %42
+         %44 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %45 = OpLoad %v4float %44
+         %46 = OpVectorShuffle %v4float %45 %45 1 3 0 2
+         %47 = OpCompositeExtract %float %46 0
+         %43 = OpFunctionCall %void %c %47
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..4983317
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,19 @@
+@group(0) @binding(0) var<uniform> u : mat2x4<f32>;
+
+fn a(m : mat2x4<f32>) {
+}
+
+fn b(v : vec4<f32>) {
+}
+
+fn c(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].ywxz);
+  c(u[1].x);
+  c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl
new file mode 100644
index 0000000..0f35977
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat2x4<f32>;
+var<private> p : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].ywxz;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c81f694
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static float2x4 p = float2x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0]);
+  p[1] = asfloat(u[0]).ywxz;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c81f694
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static float2x4 p = float2x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0]);
+  p[1] = asfloat(u[0]).ywxz;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..d3e2387
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x4 inner;
+} u;
+
+mat2x4 p = mat2x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+void f() {
+  p = u.inner;
+  p[1] = u.inner[0];
+  p[1] = u.inner[0].ywxz;
+  p[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..8405e42
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float2x4* tint_symbol_1 [[buffer(0)]]) {
+  thread float2x4 tint_symbol = float2x4(0.0f);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = float4((*(tint_symbol_1))[0]).ywxz;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..733c620
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+    %u_block = OpTypeStruct %mat2v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private_mat2v4float = OpTypePointer Private %mat2v4float
+          %9 = OpConstantNull %mat2v4float
+          %p = OpVariable %_ptr_Private_mat2v4float Private %9
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v4float = OpTypePointer Private %v4float
+         %23 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %17 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0
+         %18 = OpLoad %mat2v4float %17
+               OpStore %p %18
+         %22 = OpAccessChain %_ptr_Private_v4float %p %int_1
+         %25 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %23
+         %26 = OpLoad %v4float %25
+               OpStore %22 %26
+         %27 = OpAccessChain %_ptr_Private_v4float %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %23
+         %29 = OpLoad %v4float %28
+         %30 = OpVectorShuffle %v4float %29 %29 1 3 0 2
+               OpStore %27 %30
+         %32 = OpAccessChain %_ptr_Private_float %p %23 %int_1
+         %34 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %23
+         %35 = OpLoad %float %34
+               OpStore %32 %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..76ce420
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat2x4<f32>;
+
+var<private> p : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].ywxz;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl
new file mode 100644
index 0000000..abb20b6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat2x4<f32>;
+@group(0) @binding(1) var<storage, read_write> s : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].ywxz;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..8bc830a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+float2x4 tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store4(16u, asuint(asfloat(u[0])));
+  s.Store4(16u, asuint(asfloat(u[0]).ywxz));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8bc830a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float2x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+}
+
+float2x4 tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store4(16u, asuint(asfloat(u[0])));
+  s.Store4(16u, asuint(asfloat(u[0]).ywxz));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..01c2122
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x4 inner;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat2x4 inner;
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[0];
+  s.inner[1] = u.inner[0].ywxz;
+  s.inner[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..a99b798
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device float2x4* tint_symbol [[buffer(1)]], const constant float2x4* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..59d48a0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+    %u_block = OpTypeStruct %mat2v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+          %9 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat2v4float = OpTypePointer StorageBuffer %mat2v4float
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+         %24 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %9
+         %12 = OpLabel
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat2v4float %s %uint_0
+         %18 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0
+         %19 = OpLoad %mat2v4float %18
+               OpStore %16 %19
+         %23 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1
+         %26 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %24
+         %27 = OpLoad %v4float %26
+               OpStore %23 %27
+         %28 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %24
+         %30 = OpLoad %v4float %29
+         %31 = OpVectorShuffle %v4float %30 %30 1 3 0 2
+               OpStore %28 %31
+         %33 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %24 %int_1
+         %35 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %24
+         %36 = OpLoad %float %35
+               OpStore %33 %36
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..79cb0f5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat2x4<f32>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].ywxz;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..6d92e5a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat2x4<f32>;
+var<workgroup> w : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].ywxz;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..23c91dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared float2x4 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x4 tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float2x4((0.0f).xxxx, (0.0f).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0]);
+  w[1] = asfloat(u[0]).ywxz;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..23c91dd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared float2x4 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float2x4 tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float2x4((0.0f).xxxx, (0.0f).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0]);
+  w[1] = asfloat(u[0]).ywxz;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..b021a5c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x4 inner;
+} u;
+
+shared mat2x4 w;
+void f(uint local_invocation_index) {
+  {
+    w = mat2x4(vec4(0.0f), vec4(0.0f));
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[0];
+  w[1] = u.inner[0].ywxz;
+  w[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..35d7ce5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  float2x4 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup float2x4* const tint_symbol, const constant float2x4* const tint_symbol_1) {
+  {
+    *(tint_symbol) = float2x4(float4(0.0f), float4(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant float2x4* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup float2x4* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..e1add4d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+    %u_block = OpTypeStruct %mat2v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup_mat2v4float = OpTypePointer Workgroup %mat2v4float
+          %w = OpVariable %_ptr_Workgroup_mat2v4float Workgroup
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %uint
+         %17 = OpConstantNull %mat2v4float
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+         %29 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %42 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %12
+%local_invocation_index = OpFunctionParameter %uint
+         %16 = OpLabel
+               OpStore %w %17
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %23 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0
+         %24 = OpLoad %mat2v4float %23
+               OpStore %w %24
+         %28 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1
+         %31 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %29
+         %32 = OpLoad %v4float %31
+               OpStore %28 %32
+         %33 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1
+         %34 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %29
+         %35 = OpLoad %v4float %34
+         %36 = OpVectorShuffle %v4float %35 %35 1 3 0 2
+               OpStore %33 %36
+         %38 = OpAccessChain %_ptr_Workgroup_float %w %29 %int_1
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %29
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %42
+         %44 = OpLabel
+         %46 = OpLoad %uint %local_invocation_index_1
+         %45 = OpFunctionCall %void %f_inner %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..974dc1f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat2x4_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat2x4<f32>;
+
+var<workgroup> w : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].ywxz;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..ae5cc00
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x2<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat3x2<f16> = *p_m;
+  let l_m_i : vec2<f16>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..710277f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,29 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 3, 2> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_3 = ((4u * uint(p_m_i_save))) / 4;
+  uint ubo_load_3 = m[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  const vector<float16_t, 2> l_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..aa9b9a5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,34 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 3, 2> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_3 = ((4u * uint(p_m_i_save))) / 4;
+  uint ubo_load_3 = m[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  const vector<float16_t, 2> l_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000022586D0B760(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..6b87658
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,53 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat3x2 load_m_inner() {
+  return f16mat3x2(m.inner_0, m.inner_1, m.inner_2);
+}
+
+f16vec2 load_m_inner_p0(uint p0) {
+  switch(p0) {
+    case 0u: {
+      return m.inner_0;
+      break;
+    }
+    case 1u: {
+      return m.inner_1;
+      break;
+    }
+    case 2u: {
+      return m.inner_2;
+      break;
+    }
+    default: {
+      return f16vec2(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat3x2 p_m = load_m_inner();
+  int tint_symbol = i();
+  f16vec2 p_m_i = load_m_inner_p0(uint(tint_symbol));
+  f16mat3x2 l_m = load_m_inner();
+  f16vec2 l_m_i = load_m_inner_p0(uint(tint_symbol));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..fec7c16
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant half3x2* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  half3x2 const l_m = *(tint_symbol_2);
+  half2 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..e9d22a9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 61
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %load_m_inner_p0 "load_m_inner_p0"
+               OpName %p0 "p0"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 4
+               OpMemberDecorate %m_block_std140 2 Offset 8
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%m_block_std140 = OpTypeStruct %v2half %v2half %v2half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat3v2half = OpTypeMatrix %v2half 3
+         %17 = OpTypeFunction %mat3v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+         %37 = OpTypeFunction %v2half %uint
+         %52 = OpConstantNull %v2half
+       %void = OpTypeVoid
+         %53 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat3v2half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v2half %m %uint_0
+         %27 = OpLoad %v2half %26
+         %30 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %31 = OpLoad %v2half %30
+         %34 = OpAccessChain %_ptr_Uniform_v2half %m %uint_2
+         %35 = OpLoad %v2half %34
+         %36 = OpCompositeConstruct %mat3v2half %27 %31 %35
+               OpReturnValue %36
+               OpFunctionEnd
+%load_m_inner_p0 = OpFunction %v2half None %37
+         %p0 = OpFunctionParameter %uint
+         %40 = OpLabel
+               OpSelectionMerge %41 None
+               OpSwitch %p0 %42 0 %43 1 %44 2 %45
+         %43 = OpLabel
+         %46 = OpAccessChain %_ptr_Uniform_v2half %m %uint_0
+         %47 = OpLoad %v2half %46
+               OpReturnValue %47
+         %44 = OpLabel
+         %48 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %49 = OpLoad %v2half %48
+               OpReturnValue %49
+         %45 = OpLabel
+         %50 = OpAccessChain %_ptr_Uniform_v2half %m %uint_2
+         %51 = OpLoad %v2half %50
+               OpReturnValue %51
+         %42 = OpLabel
+               OpReturnValue %52
+         %41 = OpLabel
+               OpReturnValue %52
+               OpFunctionEnd
+          %f = OpFunction %void None %53
+         %56 = OpLabel
+         %57 = OpFunctionCall %int %i
+         %58 = OpFunctionCall %mat3v2half %load_m_inner
+         %60 = OpBitcast %uint %57
+         %59 = OpFunctionCall %v2half %load_m_inner_p0 %60
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..e4bb940
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x2<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat3x2<f16> = *(p_m);
+  let l_m_i : vec2<f16> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..2d2c4a2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x2<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat3x2<f16> = *p_m;
+  let l_m_1 : vec2<f16>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c5bbdca
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,27 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 2> l_m = tint_symbol(m, 0u);
+  uint ubo_load_3 = m[0].y;
+  const vector<float16_t, 2> l_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..09304d6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,32 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 2> l_m = tint_symbol(m, 0u);
+  uint ubo_load_3 = m[0].y;
+  const vector<float16_t, 2> l_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000213B52029A0(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..2ed83ea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,25 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+} m;
+
+f16mat3x2 load_m_inner() {
+  return f16mat3x2(m.inner_0, m.inner_1, m.inner_2);
+}
+
+void f() {
+  f16mat3x2 p_m = load_m_inner();
+  f16vec2 p_m_1 = m.inner_1;
+  f16mat3x2 l_m = load_m_inner();
+  f16vec2 l_m_1 = m.inner_1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..20f3c6a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant half3x2* tint_symbol_1 [[buffer(0)]]) {
+  half3x2 const l_m = *(tint_symbol_1);
+  half2 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..1741d5b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,75 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 4
+               OpMemberDecorate %m_block_std140 2 Offset 8
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%m_block_std140 = OpTypeStruct %v2half %v2half %v2half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat3v2half = OpTypeMatrix %v2half 3
+         %17 = OpTypeFunction %mat3v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %37 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat3v2half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v2half %m %uint_0
+         %27 = OpLoad %v2half %26
+         %30 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %31 = OpLoad %v2half %30
+         %34 = OpAccessChain %_ptr_Uniform_v2half %m %uint_2
+         %35 = OpLoad %v2half %34
+         %36 = OpCompositeConstruct %mat3v2half %27 %31 %35
+               OpReturnValue %36
+               OpFunctionEnd
+          %f = OpFunction %void None %37
+         %40 = OpLabel
+         %41 = OpFunctionCall %mat3v2half %load_m_inner
+         %42 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %43 = OpLoad %v2half %42
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..5717427
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x2<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat3x2<f16> = *(p_m);
+  let l_m_1 : vec2<f16> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl
new file mode 100644
index 0000000..8f9ab07
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c099203
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 3> t = transpose(tint_symbol(u, 0u));
+  uint ubo_load_3 = u[0].y;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+  uint ubo_load_4 = u[0].x;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..6906207
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,28 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 3> t = transpose(tint_symbol(u, 0u));
+  uint ubo_load_3 = u[0].y;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+  uint ubo_load_4 = u[0].x;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002BC2002A7B0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..44171a8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+} u;
+
+f16mat3x2 load_u_inner() {
+  return f16mat3x2(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  f16mat2x3 t = transpose(load_u_inner());
+  float16_t l = length(u.inner_1);
+  float16_t a = abs(u.inner_0.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..5a86f0d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half3x2* tint_symbol [[buffer(0)]]) {
+  half2x3 const t = transpose(*(tint_symbol));
+  half const l = length((*(tint_symbol))[1]);
+  half const a = fabs(half2((*(tint_symbol))[0]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..7eec8a6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,70 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %35 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v2half = OpTypeMatrix %v2half 3
+          %6 = OpTypeFunction %mat3v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %26 = OpTypeFunction %void
+     %v3half = OpTypeVector %half 3
+ %mat2v3half = OpTypeMatrix %v3half 2
+         %42 = OpConstantNull %uint
+%load_u_inner = OpFunction %mat3v2half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %16 = OpLoad %v2half %15
+         %19 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %20 = OpLoad %v2half %19
+         %23 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %24 = OpLoad %v2half %23
+         %25 = OpCompositeConstruct %mat3v2half %16 %20 %24
+               OpReturnValue %25
+               OpFunctionEnd
+          %f = OpFunction %void None %26
+         %29 = OpLabel
+         %33 = OpFunctionCall %mat3v2half %load_u_inner
+         %30 = OpTranspose %mat2v3half %33
+         %36 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %37 = OpLoad %v2half %36
+         %34 = OpExtInst %half %35 Length %37
+         %39 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %40 = OpLoad %v2half %39
+         %41 = OpVectorShuffle %v2half %40 %40 1 0
+         %43 = OpCompositeExtract %half %41 0
+         %38 = OpExtInst %half %35 FAbs %43
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..1746747
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl
new file mode 100644
index 0000000..7fc7da5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x2<f16>;
+
+fn a(m : mat3x2<f16>) {}
+fn b(v : vec2<f16>) {}
+fn c(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].yx);
+    c(u[1].x);
+    c(u[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0de6e6d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,35 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+void a(matrix<float16_t, 3, 2> m) {
+}
+
+void b(vector<float16_t, 2> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint ubo_load_3 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+  uint ubo_load_4 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  c(float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  uint ubo_load_5 = u[0].y;
+  c(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2f2e1d5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,42 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+void a(matrix<float16_t, 3, 2> m) {
+}
+
+void b(vector<float16_t, 2> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint ubo_load_3 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+  uint ubo_load_4 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  c(float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  uint ubo_load_5 = u[0].y;
+  c(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000014896F24370(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000014896F24370(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000014896F24370(11,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..2775f5e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,35 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+} u;
+
+void a(f16mat3x2 m) {
+}
+
+void b(f16vec2 v) {
+}
+
+void c(float16_t f_1) {
+}
+
+f16mat3x2 load_u_inner() {
+  return f16mat3x2(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  a(load_u_inner());
+  b(u.inner_1);
+  b(u.inner_1.yx);
+  c(u.inner_1[0u]);
+  c(u.inner_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..04006cd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(half3x2 m) {
+}
+
+void b(half2 v) {
+}
+
+void c(half f_1) {
+}
+
+kernel void f(const constant half3x2* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(half2((*(tint_symbol))[1]).yx);
+  c((*(tint_symbol))[1][0]);
+  c(half2((*(tint_symbol))[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..96d6933
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,99 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 61
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat3v2half = OpTypeMatrix %v2half 3
+          %6 = OpTypeFunction %void %mat3v2half
+         %12 = OpTypeFunction %void %v2half
+         %16 = OpTypeFunction %void %half
+         %20 = OpTypeFunction %mat3v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+         %39 = OpTypeFunction %void
+         %52 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+          %a = OpFunction %void None %6
+          %m = OpFunctionParameter %mat3v2half
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v2half
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %half
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%load_u_inner = OpFunction %mat3v2half None %20
+         %22 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %29 = OpLoad %v2half %28
+         %32 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %33 = OpLoad %v2half %32
+         %36 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %37 = OpLoad %v2half %36
+         %38 = OpCompositeConstruct %mat3v2half %29 %33 %37
+               OpReturnValue %38
+               OpFunctionEnd
+          %f = OpFunction %void None %39
+         %41 = OpLabel
+         %43 = OpFunctionCall %mat3v2half %load_u_inner
+         %42 = OpFunctionCall %void %a %43
+         %45 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %46 = OpLoad %v2half %45
+         %44 = OpFunctionCall %void %b %46
+         %48 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %49 = OpLoad %v2half %48
+         %50 = OpVectorShuffle %v2half %49 %49 1 0
+         %47 = OpFunctionCall %void %b %50
+         %54 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %52
+         %55 = OpLoad %half %54
+         %51 = OpFunctionCall %void %c %55
+         %57 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %58 = OpLoad %v2half %57
+         %59 = OpVectorShuffle %v2half %58 %58 1 0
+         %60 = OpCompositeExtract %half %59 0
+         %56 = OpFunctionCall %void %c %60
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..ef40b52
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x2<f16>;
+
+fn a(m : mat3x2<f16>) {
+}
+
+fn b(v : vec2<f16>) {
+}
+
+fn c(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].yx);
+  c(u[1].x);
+  c(u[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl
new file mode 100644
index 0000000..d1ad4f5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x2<f16>;
+var<private> p : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].yx;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ae2fee4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,25 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+static matrix<float16_t, 3, 2> p = matrix<float16_t, 3, 2>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint ubo_load_3 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  uint ubo_load_4 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+  p[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d8d5d22
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,30 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+static matrix<float16_t, 3, 2> p = matrix<float16_t, 3, 2>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint ubo_load_3 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  uint ubo_load_4 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+  p[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001D559EA82A0(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..deb133c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+} u;
+
+f16mat3x2 p = f16mat3x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf);
+f16mat3x2 load_u_inner() {
+  return f16mat3x2(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  p = load_u_inner();
+  p[1] = u.inner_0;
+  p[1] = u.inner_0.yx;
+  p[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..85a9952
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half3x2* tint_symbol_1 [[buffer(0)]]) {
+  thread half3x2 tint_symbol = half3x2(0.0h);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = half2((*(tint_symbol_1))[0]).yx;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..0f299f0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,82 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 51
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v2half = OpTypeMatrix %v2half 3
+%_ptr_Private_mat3v2half = OpTypePointer Private %mat3v2half
+          %9 = OpConstantNull %mat3v2half
+          %p = OpVariable %_ptr_Private_mat3v2half Private %9
+         %10 = OpTypeFunction %mat3v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v2half = OpTypePointer Private %v2half
+         %44 = OpConstantNull %int
+%_ptr_Private_half = OpTypePointer Private %half
+         %47 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat3v2half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %19 = OpLoad %v2half %18
+         %22 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %23 = OpLoad %v2half %22
+         %26 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %27 = OpLoad %v2half %26
+         %28 = OpCompositeConstruct %mat3v2half %19 %23 %27
+               OpReturnValue %28
+               OpFunctionEnd
+          %f = OpFunction %void None %29
+         %32 = OpLabel
+         %33 = OpFunctionCall %mat3v2half %load_u_inner
+               OpStore %p %33
+         %37 = OpAccessChain %_ptr_Private_v2half %p %int_1
+         %38 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %39 = OpLoad %v2half %38
+               OpStore %37 %39
+         %40 = OpAccessChain %_ptr_Private_v2half %p %int_1
+         %41 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %42 = OpLoad %v2half %41
+         %43 = OpVectorShuffle %v2half %42 %42 1 0
+               OpStore %40 %43
+         %46 = OpAccessChain %_ptr_Private_half %p %44 %int_1
+         %49 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %47
+         %50 = OpLoad %half %49
+               OpStore %46 %50
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..2966c25
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x2<f16>;
+
+var<private> p : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].yx;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl
new file mode 100644
index 0000000..73f77ef
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x2<f16>;
+@group(0) @binding(1) var<storage, read_write> s : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].yx;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ad86e04
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+}
+
+matrix<float16_t, 3, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint ubo_load_3 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+  uint ubo_load_4 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..3e6bbdf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,37 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+}
+
+matrix<float16_t, 3, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint ubo_load_3 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+  uint ubo_load_4 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000027290DF7F00(6,66-74): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000027290DF7F00(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..3e01638
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat3x2 inner;
+} s;
+
+f16mat3x2 load_u_inner() {
+  return f16mat3x2(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  s.inner = load_u_inner();
+  s.inner[1] = u.inner_0;
+  s.inner[1] = u.inner_0.yx;
+  s.inner[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..2109a16
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device half3x2* tint_symbol [[buffer(1)]], const constant half3x2* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half2((*(tint_symbol_1))[0]).yx;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..efdff11
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,92 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 53
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 4
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v2half = OpTypeMatrix %v2half 3
+    %u_block = OpTypeStruct %mat3v2half
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %10 = OpTypeFunction %mat3v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+%_ptr_StorageBuffer_mat3v2half = OpTypePointer StorageBuffer %mat3v2half
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+         %46 = OpConstantNull %int
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+         %49 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat3v2half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %19 = OpLoad %v2half %18
+         %22 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %23 = OpLoad %v2half %22
+         %26 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %27 = OpLoad %v2half %26
+         %28 = OpCompositeConstruct %mat3v2half %19 %23 %27
+               OpReturnValue %28
+               OpFunctionEnd
+          %f = OpFunction %void None %29
+         %32 = OpLabel
+         %34 = OpAccessChain %_ptr_StorageBuffer_mat3v2half %s %uint_0
+         %35 = OpFunctionCall %mat3v2half %load_u_inner
+               OpStore %34 %35
+         %39 = OpAccessChain %_ptr_StorageBuffer_v2half %s %uint_0 %int_1
+         %40 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %41 = OpLoad %v2half %40
+               OpStore %39 %41
+         %42 = OpAccessChain %_ptr_StorageBuffer_v2half %s %uint_0 %int_1
+         %43 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %44 = OpLoad %v2half %43
+         %45 = OpVectorShuffle %v2half %44 %44 1 0
+               OpStore %42 %45
+         %48 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %46 %int_1
+         %51 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %49
+         %52 = OpLoad %half %51
+               OpStore %48 %52
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..74831c6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x2<f16>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].yx;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..6128796
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x2<f16>;
+var<workgroup> w : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].yx;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2b66c9a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,37 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+groupshared matrix<float16_t, 3, 2> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 3, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint ubo_load_3 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  uint ubo_load_4 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+  w[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ed15e19
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,42 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+groupshared matrix<float16_t, 3, 2> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 3, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint ubo_load_3 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16)));
+  uint ubo_load_4 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))).yx;
+  w[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000020735C921C0(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..aafa15e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+} u;
+
+shared f16mat3x2 w;
+f16mat3x2 load_u_inner() {
+  return f16mat3x2(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f(uint local_invocation_index) {
+  {
+    w = f16mat3x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf));
+  }
+  barrier();
+  w = load_u_inner();
+  w[1] = u.inner_0;
+  w[1] = u.inner_0.yx;
+  w[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..bfd1bb3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  half3x2 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup half3x2* const tint_symbol, const constant half3x2* const tint_symbol_1) {
+  {
+    *(tint_symbol) = half3x2(half2(0.0h), half2(0.0h), half2(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half2((*(tint_symbol_1))[0]).yx;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant half3x2* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup half3x2* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..036e270
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,99 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 61
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v2half = OpTypeMatrix %v2half 3
+%_ptr_Workgroup_mat3v2half = OpTypePointer Workgroup %mat3v2half
+          %w = OpVariable %_ptr_Workgroup_mat3v2half Workgroup
+         %12 = OpTypeFunction %mat3v2half
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %30 = OpTypeFunction %void %uint
+         %35 = OpConstantNull %mat3v2half
+   %uint_264 = OpConstant %uint 264
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v2half = OpTypePointer Workgroup %v2half
+         %49 = OpConstantNull %int
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+         %52 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+         %56 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat3v2half None %12
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %20 = OpLoad %v2half %19
+         %23 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %24 = OpLoad %v2half %23
+         %27 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %28 = OpLoad %v2half %27
+         %29 = OpCompositeConstruct %mat3v2half %20 %24 %28
+               OpReturnValue %29
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %30
+%local_invocation_index = OpFunctionParameter %uint
+         %34 = OpLabel
+               OpStore %w %35
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %38 = OpFunctionCall %mat3v2half %load_u_inner
+               OpStore %w %38
+         %42 = OpAccessChain %_ptr_Workgroup_v2half %w %int_1
+         %43 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %44 = OpLoad %v2half %43
+               OpStore %42 %44
+         %45 = OpAccessChain %_ptr_Workgroup_v2half %w %int_1
+         %46 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %47 = OpLoad %v2half %46
+         %48 = OpVectorShuffle %v2half %47 %47 1 0
+               OpStore %45 %48
+         %51 = OpAccessChain %_ptr_Workgroup_half %w %49 %int_1
+         %54 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %52
+         %55 = OpLoad %half %54
+               OpStore %51 %55
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %56
+         %58 = OpLabel
+         %60 = OpLoad %uint %local_invocation_index_1
+         %59 = OpFunctionCall %void %f_inner %60
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..b7f4955
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x2_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x2<f16>;
+
+var<workgroup> w : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].yx;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/dynamic_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/static_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/static_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_builtin.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_builtin.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_fn.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_fn.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_private.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_private.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_storage.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_storage.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat3x2/to_workgroup.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat3x2_f32/to_workgroup.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..470248a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x3<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat3x3<f16> = *p_m;
+  let l_m_i : vec3<f16>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9e97415
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 3, 3> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_3 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_7 = m[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const vector<float16_t, 3> l_m_i = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..dd2a0ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 3, 3> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_3 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_7 = m[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const vector<float16_t, 3> l_m_i = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F17CCCC790(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..eecfed8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,53 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat3 load_m_inner() {
+  return f16mat3(m.inner_0, m.inner_1, m.inner_2);
+}
+
+f16vec3 load_m_inner_p0(uint p0) {
+  switch(p0) {
+    case 0u: {
+      return m.inner_0;
+      break;
+    }
+    case 1u: {
+      return m.inner_1;
+      break;
+    }
+    case 2u: {
+      return m.inner_2;
+      break;
+    }
+    default: {
+      return f16vec3(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat3 p_m = load_m_inner();
+  int tint_symbol = i();
+  f16vec3 p_m_i = load_m_inner_p0(uint(tint_symbol));
+  f16mat3 l_m = load_m_inner();
+  f16vec3 l_m_i = load_m_inner_p0(uint(tint_symbol));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..7d5c27c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant half3x3* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  half3x3 const l_m = *(tint_symbol_2);
+  half3 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..e158133
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 61
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %load_m_inner_p0 "load_m_inner_p0"
+               OpName %p0 "p0"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpMemberDecorate %m_block_std140 2 Offset 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%m_block_std140 = OpTypeStruct %v3half %v3half %v3half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat3v3half = OpTypeMatrix %v3half 3
+         %17 = OpTypeFunction %mat3v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+         %37 = OpTypeFunction %v3half %uint
+         %52 = OpConstantNull %v3half
+       %void = OpTypeVoid
+         %53 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat3v3half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v3half %m %uint_0
+         %27 = OpLoad %v3half %26
+         %30 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %31 = OpLoad %v3half %30
+         %34 = OpAccessChain %_ptr_Uniform_v3half %m %uint_2
+         %35 = OpLoad %v3half %34
+         %36 = OpCompositeConstruct %mat3v3half %27 %31 %35
+               OpReturnValue %36
+               OpFunctionEnd
+%load_m_inner_p0 = OpFunction %v3half None %37
+         %p0 = OpFunctionParameter %uint
+         %40 = OpLabel
+               OpSelectionMerge %41 None
+               OpSwitch %p0 %42 0 %43 1 %44 2 %45
+         %43 = OpLabel
+         %46 = OpAccessChain %_ptr_Uniform_v3half %m %uint_0
+         %47 = OpLoad %v3half %46
+               OpReturnValue %47
+         %44 = OpLabel
+         %48 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %49 = OpLoad %v3half %48
+               OpReturnValue %49
+         %45 = OpLabel
+         %50 = OpAccessChain %_ptr_Uniform_v3half %m %uint_2
+         %51 = OpLoad %v3half %50
+               OpReturnValue %51
+         %42 = OpLabel
+               OpReturnValue %52
+         %41 = OpLabel
+               OpReturnValue %52
+               OpFunctionEnd
+          %f = OpFunction %void None %53
+         %56 = OpLabel
+         %57 = OpFunctionCall %int %i
+         %58 = OpFunctionCall %mat3v3half %load_m_inner
+         %60 = OpBitcast %uint %57
+         %59 = OpFunctionCall %v3half %load_m_inner_p0 %60
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..df87b72
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x3<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat3x3<f16> = *(p_m);
+  let l_m_i : vec3<f16> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..81de27a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x3<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat3x3<f16> = *p_m;
+  let l_m_1 : vec3<f16>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f1b227c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,38 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 3> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_6 = m[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const vector<float16_t, 3> l_m_1 = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..fa7dc22
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,43 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 3> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_6 = m[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const vector<float16_t, 3> l_m_1 = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001DA8C8147C0(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..2104f3b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,25 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+} m;
+
+f16mat3 load_m_inner() {
+  return f16mat3(m.inner_0, m.inner_1, m.inner_2);
+}
+
+void f() {
+  f16mat3 p_m = load_m_inner();
+  f16vec3 p_m_1 = m.inner_1;
+  f16mat3 l_m = load_m_inner();
+  f16vec3 l_m_1 = m.inner_1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..8950017
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant half3x3* tint_symbol_1 [[buffer(0)]]) {
+  half3x3 const l_m = *(tint_symbol_1);
+  half3 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..2a58a57
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,75 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpMemberDecorate %m_block_std140 2 Offset 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%m_block_std140 = OpTypeStruct %v3half %v3half %v3half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat3v3half = OpTypeMatrix %v3half 3
+         %17 = OpTypeFunction %mat3v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %37 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat3v3half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v3half %m %uint_0
+         %27 = OpLoad %v3half %26
+         %30 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %31 = OpLoad %v3half %30
+         %34 = OpAccessChain %_ptr_Uniform_v3half %m %uint_2
+         %35 = OpLoad %v3half %34
+         %36 = OpCompositeConstruct %mat3v3half %27 %31 %35
+               OpReturnValue %36
+               OpFunctionEnd
+          %f = OpFunction %void None %37
+         %40 = OpLabel
+         %41 = OpFunctionCall %mat3v3half %load_m_inner
+         %42 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %43 = OpLoad %v3half %42
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..25749f7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x3<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat3x3<f16> = *(p_m);
+  let l_m_1 : vec3<f16> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl
new file mode 100644
index 0000000..9650ea6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..437c9aa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,36 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 3> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b0bfbfc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,41 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 3> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000233E8349550(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..fddaa3c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+} u;
+
+f16mat3 load_u_inner() {
+  return f16mat3(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  f16mat3 t = transpose(load_u_inner());
+  float16_t l = length(u.inner_1);
+  float16_t a = abs(u.inner_0.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..3b7e954
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half3x3* tint_symbol [[buffer(0)]]) {
+  half3x3 const t = transpose(*(tint_symbol));
+  half const l = length((*(tint_symbol))[1]);
+  half const a = fabs(half3((*(tint_symbol))[0]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..663829b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %33 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+          %6 = OpTypeFunction %mat3v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %26 = OpTypeFunction %void
+         %40 = OpConstantNull %uint
+%load_u_inner = OpFunction %mat3v3half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %16 = OpLoad %v3half %15
+         %19 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %20 = OpLoad %v3half %19
+         %23 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %24 = OpLoad %v3half %23
+         %25 = OpCompositeConstruct %mat3v3half %16 %20 %24
+               OpReturnValue %25
+               OpFunctionEnd
+          %f = OpFunction %void None %26
+         %29 = OpLabel
+         %31 = OpFunctionCall %mat3v3half %load_u_inner
+         %30 = OpTranspose %mat3v3half %31
+         %34 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %35 = OpLoad %v3half %34
+         %32 = OpExtInst %half %33 Length %35
+         %37 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %38 = OpLoad %v3half %37
+         %39 = OpVectorShuffle %v3half %38 %38 2 0 1
+         %41 = OpCompositeExtract %half %39 0
+         %36 = OpExtInst %half %33 FAbs %41
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..e0a64f6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl
new file mode 100644
index 0000000..37f0a59
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x3<f16>;
+
+fn a(m : mat3x3<f16>) {}
+fn b(v : vec3<f16>) {}
+fn c(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].zxy);
+    c(u[1].x);
+    c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a6528bd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,50 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(matrix<float16_t, 3, 3> m) {
+}
+
+void b(vector<float16_t, 3> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+  uint2 ubo_load_7 = u[0].zw;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..88bb888
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,57 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(matrix<float16_t, 3, 3> m) {
+}
+
+void b(vector<float16_t, 3> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+  uint2 ubo_load_7 = u[0].zw;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000024400654510(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000024400654510(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000024400654510(11,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..5bf5833
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,35 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+} u;
+
+void a(f16mat3 m) {
+}
+
+void b(f16vec3 v) {
+}
+
+void c(float16_t f_1) {
+}
+
+f16mat3 load_u_inner() {
+  return f16mat3(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  a(load_u_inner());
+  b(u.inner_1);
+  b(u.inner_1.zxy);
+  c(u.inner_1[0u]);
+  c(u.inner_1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..3ae8932
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(half3x3 m) {
+}
+
+void b(half3 v) {
+}
+
+void c(half f_1) {
+}
+
+kernel void f(const constant half3x3* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(half3((*(tint_symbol))[1]).zxy);
+  c((*(tint_symbol))[1][0]);
+  c(half3((*(tint_symbol))[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..e2590e2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,99 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 61
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat3v3half = OpTypeMatrix %v3half 3
+          %6 = OpTypeFunction %void %mat3v3half
+         %12 = OpTypeFunction %void %v3half
+         %16 = OpTypeFunction %void %half
+         %20 = OpTypeFunction %mat3v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+         %39 = OpTypeFunction %void
+         %52 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+          %a = OpFunction %void None %6
+          %m = OpFunctionParameter %mat3v3half
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v3half
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %half
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%load_u_inner = OpFunction %mat3v3half None %20
+         %22 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %29 = OpLoad %v3half %28
+         %32 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %33 = OpLoad %v3half %32
+         %36 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %37 = OpLoad %v3half %36
+         %38 = OpCompositeConstruct %mat3v3half %29 %33 %37
+               OpReturnValue %38
+               OpFunctionEnd
+          %f = OpFunction %void None %39
+         %41 = OpLabel
+         %43 = OpFunctionCall %mat3v3half %load_u_inner
+         %42 = OpFunctionCall %void %a %43
+         %45 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %46 = OpLoad %v3half %45
+         %44 = OpFunctionCall %void %b %46
+         %48 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %49 = OpLoad %v3half %48
+         %50 = OpVectorShuffle %v3half %49 %49 2 0 1
+         %47 = OpFunctionCall %void %b %50
+         %54 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %52
+         %55 = OpLoad %half %54
+         %51 = OpFunctionCall %void %c %55
+         %57 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %58 = OpLoad %v3half %57
+         %59 = OpVectorShuffle %v3half %58 %58 2 0 1
+         %60 = OpCompositeExtract %half %59 0
+         %56 = OpFunctionCall %void %c %60
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..191c69a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x3<f16>;
+
+fn a(m : mat3x3<f16>) {
+}
+
+fn b(v : vec3<f16>) {
+}
+
+fn c(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].zxy);
+  c(u[1].x);
+  c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl
new file mode 100644
index 0000000..6c80a59
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x3<f16>;
+var<private> p : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].zxy;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..fe3d739
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,38 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static matrix<float16_t, 3, 3> p = matrix<float16_t, 3, 3>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..51b24ed
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,43 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static matrix<float16_t, 3, 3> p = matrix<float16_t, 3, 3>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000024859339490(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..6d7d10a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+} u;
+
+f16mat3 p = f16mat3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf);
+f16mat3 load_u_inner() {
+  return f16mat3(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  p = load_u_inner();
+  p[1] = u.inner_0;
+  p[1] = u.inner_0.zxy;
+  p[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..7e3877d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half3x3* tint_symbol_1 [[buffer(0)]]) {
+  thread half3x3 tint_symbol = half3x3(0.0h);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = half3((*(tint_symbol_1))[0]).zxy;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..2e8ef86
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,82 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 51
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+%_ptr_Private_mat3v3half = OpTypePointer Private %mat3v3half
+          %9 = OpConstantNull %mat3v3half
+          %p = OpVariable %_ptr_Private_mat3v3half Private %9
+         %10 = OpTypeFunction %mat3v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v3half = OpTypePointer Private %v3half
+         %44 = OpConstantNull %int
+%_ptr_Private_half = OpTypePointer Private %half
+         %47 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat3v3half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %19 = OpLoad %v3half %18
+         %22 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %23 = OpLoad %v3half %22
+         %26 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %27 = OpLoad %v3half %26
+         %28 = OpCompositeConstruct %mat3v3half %19 %23 %27
+               OpReturnValue %28
+               OpFunctionEnd
+          %f = OpFunction %void None %29
+         %32 = OpLabel
+         %33 = OpFunctionCall %mat3v3half %load_u_inner
+               OpStore %p %33
+         %37 = OpAccessChain %_ptr_Private_v3half %p %int_1
+         %38 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %39 = OpLoad %v3half %38
+               OpStore %37 %39
+         %40 = OpAccessChain %_ptr_Private_v3half %p %int_1
+         %41 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %42 = OpLoad %v3half %41
+         %43 = OpVectorShuffle %v3half %42 %42 2 0 1
+               OpStore %40 %43
+         %46 = OpAccessChain %_ptr_Private_half %p %44 %int_1
+         %49 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %47
+         %50 = OpLoad %half %49
+               OpStore %46 %50
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..50d1f96
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x3<f16>;
+
+var<private> p : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].zxy;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl
new file mode 100644
index 0000000..b3e56a4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x3<f16>;
+@group(0) @binding(1) var<storage, read_write> s : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].zxy;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..2a2db34
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,44 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+}
+
+matrix<float16_t, 3, 3> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b7409a7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,50 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+}
+
+matrix<float16_t, 3, 3> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002A95A169E30(6,66-74): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002A95A169E30(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..740aecc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat3 inner;
+} s;
+
+f16mat3 load_u_inner() {
+  return f16mat3(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  s.inner = load_u_inner();
+  s.inner[1] = u.inner_0;
+  s.inner[1] = u.inner_0.zxy;
+  s.inner[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..88e665b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device half3x3* tint_symbol [[buffer(1)]], const constant half3x3* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..405f643
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,92 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 53
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 8
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+    %u_block = OpTypeStruct %mat3v3half
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %10 = OpTypeFunction %mat3v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+%_ptr_StorageBuffer_mat3v3half = OpTypePointer StorageBuffer %mat3v3half
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+         %46 = OpConstantNull %int
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+         %49 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat3v3half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %19 = OpLoad %v3half %18
+         %22 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %23 = OpLoad %v3half %22
+         %26 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %27 = OpLoad %v3half %26
+         %28 = OpCompositeConstruct %mat3v3half %19 %23 %27
+               OpReturnValue %28
+               OpFunctionEnd
+          %f = OpFunction %void None %29
+         %32 = OpLabel
+         %34 = OpAccessChain %_ptr_StorageBuffer_mat3v3half %s %uint_0
+         %35 = OpFunctionCall %mat3v3half %load_u_inner
+               OpStore %34 %35
+         %39 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1
+         %40 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %41 = OpLoad %v3half %40
+               OpStore %39 %41
+         %42 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1
+         %43 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %44 = OpLoad %v3half %43
+         %45 = OpVectorShuffle %v3half %44 %44 2 0 1
+               OpStore %42 %45
+         %48 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %46 %int_1
+         %51 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %49
+         %52 = OpLoad %half %51
+               OpStore %48 %52
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..8bf2700
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x3<f16>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].zxy;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..8dbebb4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x3<f16>;
+var<workgroup> w : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].zxy;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b6830ac
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,50 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared matrix<float16_t, 3, 3> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 3> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 3, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..960e913
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,55 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared matrix<float16_t, 3, 3> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 3> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 3, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]);
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zxy;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000027B0D02C6A0(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..e3c304b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+} u;
+
+shared f16mat3 w;
+f16mat3 load_u_inner() {
+  return f16mat3(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f(uint local_invocation_index) {
+  {
+    w = f16mat3(f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf));
+  }
+  barrier();
+  w = load_u_inner();
+  w[1] = u.inner_0;
+  w[1] = u.inner_0.zxy;
+  w[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..849494f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  half3x3 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup half3x3* const tint_symbol, const constant half3x3* const tint_symbol_1) {
+  {
+    *(tint_symbol) = half3x3(half3(0.0h), half3(0.0h), half3(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant half3x3* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup half3x3* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..3482df8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,99 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 61
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+%_ptr_Workgroup_mat3v3half = OpTypePointer Workgroup %mat3v3half
+          %w = OpVariable %_ptr_Workgroup_mat3v3half Workgroup
+         %12 = OpTypeFunction %mat3v3half
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %30 = OpTypeFunction %void %uint
+         %35 = OpConstantNull %mat3v3half
+   %uint_264 = OpConstant %uint 264
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v3half = OpTypePointer Workgroup %v3half
+         %49 = OpConstantNull %int
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+         %52 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+         %56 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat3v3half None %12
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %20 = OpLoad %v3half %19
+         %23 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %24 = OpLoad %v3half %23
+         %27 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %28 = OpLoad %v3half %27
+         %29 = OpCompositeConstruct %mat3v3half %20 %24 %28
+               OpReturnValue %29
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %30
+%local_invocation_index = OpFunctionParameter %uint
+         %34 = OpLabel
+               OpStore %w %35
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %38 = OpFunctionCall %mat3v3half %load_u_inner
+               OpStore %w %38
+         %42 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1
+         %43 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %44 = OpLoad %v3half %43
+               OpStore %42 %44
+         %45 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1
+         %46 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %47 = OpLoad %v3half %46
+         %48 = OpVectorShuffle %v3half %47 %47 2 0 1
+               OpStore %45 %48
+         %51 = OpAccessChain %_ptr_Workgroup_half %w %49 %int_1
+         %54 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %52
+         %55 = OpLoad %half %54
+               OpStore %51 %55
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %56
+         %58 = OpLabel
+         %60 = OpLoad %uint %local_invocation_index_1
+         %59 = OpFunctionCall %void %f_inner %60
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..8770580
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x3<f16>;
+
+var<workgroup> w : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].zxy;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..a927b2d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat3x3<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat3x3<f32> = *p_m;
+  let l_m_i : vec3<f32>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ee3d2c3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,25 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[3];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float3x3 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_3 = ((16u * uint(p_m_i_save))) / 4;
+  const float3 l_m_i = asfloat(m[scalar_offset_3 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ee3d2c3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,25 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[3];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float3x3 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_3 = ((16u * uint(p_m_i_save))) / 4;
+  const float3 l_m_i = asfloat(m[scalar_offset_3 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..b92e950
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat3 inner;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_m_i_save = tint_symbol;
+  mat3 l_m = m.inner;
+  vec3 l_m_i = m.inner[p_m_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..f7cf012
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant float3x3* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  float3x3 const l_m = *(tint_symbol_2);
+  float3 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..c24fe7536
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+    %m_block = OpTypeStruct %mat3v3float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %22 = OpFunctionCall %int %i
+         %26 = OpAccessChain %_ptr_Uniform_mat3v3float %m %uint_0
+         %27 = OpLoad %mat3v3float %26
+         %29 = OpAccessChain %_ptr_Uniform_v3float %m %uint_0 %22
+         %30 = OpLoad %v3float %29
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..b38a53e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat3x3<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat3x3<f32> = *(p_m);
+  let l_m_i : vec3<f32> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..73cc3d4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat3x3<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat3x3<f32> = *p_m;
+  let l_m_1 : vec3<f32>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..bc49773
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[3];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x3 l_m = tint_symbol(m, 0u);
+  const float3 l_m_1 = asfloat(m[1].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..bc49773
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[3];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x3 l_m = tint_symbol(m, 0u);
+  const float3 l_m_1 = asfloat(m[1].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..825d355
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,16 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat3 inner;
+} m;
+
+void f() {
+  mat3 l_m = m.inner;
+  vec3 l_m_1 = m.inner[1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..8d47ca6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant float3x3* tint_symbol_1 [[buffer(0)]]) {
+  float3x3 const l_m = *(tint_symbol_1);
+  float3 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..3e3f49f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+    %m_block = OpTypeStruct %mat3v3float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %25 = OpAccessChain %_ptr_Uniform_mat3v3float %m %uint_0
+         %26 = OpLoad %mat3v3float %25
+         %28 = OpAccessChain %_ptr_Uniform_v3float %m %uint_0 %int_1
+         %29 = OpLoad %v3float %28
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..a518e93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat3x3<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat3x3<f32> = *(p_m);
+  let l_m_1 : vec3<f32> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl
new file mode 100644
index 0000000..8d72d67
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..47e86de
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x3 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1].xyz));
+  const float a = abs(asfloat(u[0].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..47e86de
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x3 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1].xyz));
+  const float a = abs(asfloat(u[0].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..a0cddf0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3 inner;
+} u;
+
+void f() {
+  mat3 t = transpose(u.inner);
+  float l = length(u.inner[1]);
+  float a = abs(u.inner[0].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..448c8e0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float3x3* tint_symbol [[buffer(0)]]) {
+  float3x3 const t = transpose(*(tint_symbol));
+  float const l = length((*(tint_symbol))[1]);
+  float const a = fabs(float3((*(tint_symbol))[0]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..0c566a8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,51 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+         %18 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+    %u_block = OpTypeStruct %mat3v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+         %25 = OpConstantNull %int
+          %f = OpFunction %void None %7
+         %10 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0
+         %16 = OpLoad %mat3v3float %15
+         %11 = OpTranspose %mat3v3float %16
+         %22 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %23 = OpLoad %v3float %22
+         %17 = OpExtInst %float %18 Length %23
+         %26 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %25
+         %27 = OpLoad %v3float %26
+         %28 = OpVectorShuffle %v3float %27 %27 2 0 1
+         %29 = OpCompositeExtract %float %28 0
+         %24 = OpExtInst %float %18 FAbs %29
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..5903946
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl
new file mode 100644
index 0000000..528d3e3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : mat3x3<f32>;
+
+fn a(m : mat3x3<f32>) {}
+fn b(v : vec3<f32>) {}
+fn c(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].zxy);
+    c(u[1].x);
+    c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e871bd0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,29 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+void a(float3x3 m) {
+}
+
+void b(float3 v) {
+}
+
+void c(float f_1) {
+}
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1].xyz));
+  b(asfloat(u[1].xyz).zxy);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e871bd0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,29 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+void a(float3x3 m) {
+}
+
+void b(float3 v) {
+}
+
+void c(float f_1) {
+}
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1].xyz));
+  b(asfloat(u[1].xyz).zxy);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..e16f33a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,28 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3 inner;
+} u;
+
+void a(mat3 m) {
+}
+
+void b(vec3 v) {
+}
+
+void c(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  b(u.inner[1].zxy);
+  c(u.inner[1].x);
+  c(u.inner[1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..e0131fb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(float3x3 m) {
+}
+
+void b(float3 v) {
+}
+
+void c(float f_1) {
+}
+
+kernel void f(const constant float3x3* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(float3((*(tint_symbol))[1]).zxy);
+  c((*(tint_symbol))[1][0]);
+  c(float3((*(tint_symbol))[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..368867a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+    %u_block = OpTypeStruct %mat3v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void %mat3v3float
+         %12 = OpTypeFunction %void %v3float
+         %16 = OpTypeFunction %void %float
+         %20 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %a = OpFunction %void None %7
+          %m = OpFunctionParameter %mat3v3float
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v3float
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %float
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %20
+         %22 = OpLabel
+         %27 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0
+         %28 = OpLoad %mat3v3float %27
+         %23 = OpFunctionCall %void %a %28
+         %33 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %34 = OpLoad %v3float %33
+         %29 = OpFunctionCall %void %b %34
+         %36 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %37 = OpLoad %v3float %36
+         %38 = OpVectorShuffle %v3float %37 %37 2 0 1
+         %35 = OpFunctionCall %void %b %38
+         %41 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %uint_0
+         %42 = OpLoad %float %41
+         %39 = OpFunctionCall %void %c %42
+         %44 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %45 = OpLoad %v3float %44
+         %46 = OpVectorShuffle %v3float %45 %45 2 0 1
+         %47 = OpCompositeExtract %float %46 0
+         %43 = OpFunctionCall %void %c %47
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..f4f9ace
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,19 @@
+@group(0) @binding(0) var<uniform> u : mat3x3<f32>;
+
+fn a(m : mat3x3<f32>) {
+}
+
+fn b(v : vec3<f32>) {
+}
+
+fn c(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].zxy);
+  c(u[1].x);
+  c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl
new file mode 100644
index 0000000..06230bf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat3x3<f32>;
+var<private> p : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].zxy;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0d4ad6d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,20 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+static float3x3 p = float3x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0].xyz);
+  p[1] = asfloat(u[0].xyz).zxy;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..0d4ad6d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,20 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+static float3x3 p = float3x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0].xyz);
+  p[1] = asfloat(u[0].xyz).zxy;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..13d4c1f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3 inner;
+} u;
+
+mat3 p = mat3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+void f() {
+  p = u.inner;
+  p[1] = u.inner[0];
+  p[1] = u.inner[0].zxy;
+  p[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..4471b02
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float3x3* tint_symbol_1 [[buffer(0)]]) {
+  thread float3x3 tint_symbol = float3x3(0.0f);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = float3((*(tint_symbol_1))[0]).zxy;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..e71a764
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+    %u_block = OpTypeStruct %mat3v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private_mat3v3float = OpTypePointer Private %mat3v3float
+          %9 = OpConstantNull %mat3v3float
+          %p = OpVariable %_ptr_Private_mat3v3float Private %9
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v3float = OpTypePointer Private %v3float
+         %23 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %17 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0
+         %18 = OpLoad %mat3v3float %17
+               OpStore %p %18
+         %22 = OpAccessChain %_ptr_Private_v3float %p %int_1
+         %25 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %23
+         %26 = OpLoad %v3float %25
+               OpStore %22 %26
+         %27 = OpAccessChain %_ptr_Private_v3float %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %23
+         %29 = OpLoad %v3float %28
+         %30 = OpVectorShuffle %v3float %29 %29 2 0 1
+               OpStore %27 %30
+         %32 = OpAccessChain %_ptr_Private_float %p %23 %int_1
+         %34 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %23
+         %35 = OpLoad %float %34
+               OpStore %32 %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..a9b219a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat3x3<f32>;
+
+var<private> p : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].zxy;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl
new file mode 100644
index 0000000..21b61b2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat3x3<f32>;
+@group(0) @binding(1) var<storage, read_write> s : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].zxy;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..6b28dd2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,26 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+float3x3 tint_symbol_2(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store3(16u, asuint(asfloat(u[0].xyz)));
+  s.Store3(16u, asuint(asfloat(u[0].xyz).zxy));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..6b28dd2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,26 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float3x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+}
+
+float3x3 tint_symbol_2(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store3(16u, asuint(asfloat(u[0].xyz)));
+  s.Store3(16u, asuint(asfloat(u[0].xyz).zxy));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..2e910e8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3 inner;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat3 inner;
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[0];
+  s.inner[1] = u.inner[0].zxy;
+  s.inner[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..207c6b0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device float3x3* tint_symbol [[buffer(1)]], const constant float3x3* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..fd0a6f8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+    %u_block = OpTypeStruct %mat3v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+          %9 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat3v3float = OpTypePointer StorageBuffer %mat3v3float
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+         %24 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %9
+         %12 = OpLabel
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat3v3float %s %uint_0
+         %18 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0
+         %19 = OpLoad %mat3v3float %18
+               OpStore %16 %19
+         %23 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1
+         %26 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %24
+         %27 = OpLoad %v3float %26
+               OpStore %23 %27
+         %28 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %24
+         %30 = OpLoad %v3float %29
+         %31 = OpVectorShuffle %v3float %30 %30 2 0 1
+               OpStore %28 %31
+         %33 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %24 %int_1
+         %35 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %24
+         %36 = OpLoad %float %35
+               OpStore %33 %36
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..a32a4b9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat3x3<f32>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].zxy;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..db3ec99
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat3x3<f32>;
+var<workgroup> w : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].zxy;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b51cc7d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,32 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+groupshared float3x3 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x3 tint_symbol_2(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0].xyz);
+  w[1] = asfloat(u[0].xyz).zxy;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b51cc7d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,32 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+groupshared float3x3 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x3 tint_symbol_2(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float3x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0].xyz);
+  w[1] = asfloat(u[0].xyz).zxy;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..4356fb5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3 inner;
+} u;
+
+shared mat3 w;
+void f(uint local_invocation_index) {
+  {
+    w = mat3(vec3(0.0f), vec3(0.0f), vec3(0.0f));
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[0];
+  w[1] = u.inner[0].zxy;
+  w[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..9c73286
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  float3x3 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup float3x3* const tint_symbol, const constant float3x3* const tint_symbol_1) {
+  {
+    *(tint_symbol) = float3x3(float3(0.0f), float3(0.0f), float3(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant float3x3* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup float3x3* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..f737673
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+    %u_block = OpTypeStruct %mat3v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup_mat3v3float = OpTypePointer Workgroup %mat3v3float
+          %w = OpVariable %_ptr_Workgroup_mat3v3float Workgroup
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %uint
+         %17 = OpConstantNull %mat3v3float
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
+         %29 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %42 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %12
+%local_invocation_index = OpFunctionParameter %uint
+         %16 = OpLabel
+               OpStore %w %17
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %23 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0
+         %24 = OpLoad %mat3v3float %23
+               OpStore %w %24
+         %28 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1
+         %31 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %29
+         %32 = OpLoad %v3float %31
+               OpStore %28 %32
+         %33 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1
+         %34 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %29
+         %35 = OpLoad %v3float %34
+         %36 = OpVectorShuffle %v3float %35 %35 2 0 1
+               OpStore %33 %36
+         %38 = OpAccessChain %_ptr_Workgroup_float %w %29 %int_1
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %29
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %42
+         %44 = OpLabel
+         %46 = OpLoad %uint %local_invocation_index_1
+         %45 = OpFunctionCall %void %f_inner %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..165ffb8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat3x3<f32>;
+
+var<workgroup> w : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].zxy;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..9fbc14f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x4<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat3x4<f16> = *p_m;
+  let l_m_i : vec4<f16>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..6373d70
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 3, 4> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_3 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_7 = m[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const vector<float16_t, 4> l_m_i = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..2f16db2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 3, 4> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_3 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_7 = m[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const vector<float16_t, 4> l_m_i = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FCC8F3EC10(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..3199f82
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,53 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat3x4 load_m_inner() {
+  return f16mat3x4(m.inner_0, m.inner_1, m.inner_2);
+}
+
+f16vec4 load_m_inner_p0(uint p0) {
+  switch(p0) {
+    case 0u: {
+      return m.inner_0;
+      break;
+    }
+    case 1u: {
+      return m.inner_1;
+      break;
+    }
+    case 2u: {
+      return m.inner_2;
+      break;
+    }
+    default: {
+      return f16vec4(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat3x4 p_m = load_m_inner();
+  int tint_symbol = i();
+  f16vec4 p_m_i = load_m_inner_p0(uint(tint_symbol));
+  f16mat3x4 l_m = load_m_inner();
+  f16vec4 l_m_i = load_m_inner_p0(uint(tint_symbol));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..70345b3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant half3x4* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  half3x4 const l_m = *(tint_symbol_2);
+  half4 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..caeaa70
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 61
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %load_m_inner_p0 "load_m_inner_p0"
+               OpName %p0 "p0"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpMemberDecorate %m_block_std140 2 Offset 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%m_block_std140 = OpTypeStruct %v4half %v4half %v4half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat3v4half = OpTypeMatrix %v4half 3
+         %17 = OpTypeFunction %mat3v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+         %37 = OpTypeFunction %v4half %uint
+         %52 = OpConstantNull %v4half
+       %void = OpTypeVoid
+         %53 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat3v4half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v4half %m %uint_0
+         %27 = OpLoad %v4half %26
+         %30 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %31 = OpLoad %v4half %30
+         %34 = OpAccessChain %_ptr_Uniform_v4half %m %uint_2
+         %35 = OpLoad %v4half %34
+         %36 = OpCompositeConstruct %mat3v4half %27 %31 %35
+               OpReturnValue %36
+               OpFunctionEnd
+%load_m_inner_p0 = OpFunction %v4half None %37
+         %p0 = OpFunctionParameter %uint
+         %40 = OpLabel
+               OpSelectionMerge %41 None
+               OpSwitch %p0 %42 0 %43 1 %44 2 %45
+         %43 = OpLabel
+         %46 = OpAccessChain %_ptr_Uniform_v4half %m %uint_0
+         %47 = OpLoad %v4half %46
+               OpReturnValue %47
+         %44 = OpLabel
+         %48 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %49 = OpLoad %v4half %48
+               OpReturnValue %49
+         %45 = OpLabel
+         %50 = OpAccessChain %_ptr_Uniform_v4half %m %uint_2
+         %51 = OpLoad %v4half %50
+               OpReturnValue %51
+         %42 = OpLabel
+               OpReturnValue %52
+         %41 = OpLabel
+               OpReturnValue %52
+               OpFunctionEnd
+          %f = OpFunction %void None %53
+         %56 = OpLabel
+         %57 = OpFunctionCall %int %i
+         %58 = OpFunctionCall %mat3v4half %load_m_inner
+         %60 = OpBitcast %uint %57
+         %59 = OpFunctionCall %v4half %load_m_inner_p0 %60
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..29d87ff
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x4<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat3x4<f16> = *(p_m);
+  let l_m_i : vec4<f16> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..dc27594
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x4<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat3x4<f16> = *p_m;
+  let l_m_1 : vec4<f16>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..cfc99ea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,38 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 4> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_6 = m[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const vector<float16_t, 4> l_m_1 = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..be7103a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,43 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 4> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_6 = m[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const vector<float16_t, 4> l_m_1 = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000017BEAA5D290(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..b40e9bb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,25 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+} m;
+
+f16mat3x4 load_m_inner() {
+  return f16mat3x4(m.inner_0, m.inner_1, m.inner_2);
+}
+
+void f() {
+  f16mat3x4 p_m = load_m_inner();
+  f16vec4 p_m_1 = m.inner_1;
+  f16mat3x4 l_m = load_m_inner();
+  f16vec4 l_m_1 = m.inner_1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..82f5ec8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant half3x4* tint_symbol_1 [[buffer(0)]]) {
+  half3x4 const l_m = *(tint_symbol_1);
+  half4 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..546939d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,75 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpMemberDecorate %m_block_std140 2 Offset 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%m_block_std140 = OpTypeStruct %v4half %v4half %v4half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat3v4half = OpTypeMatrix %v4half 3
+         %17 = OpTypeFunction %mat3v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %37 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat3v4half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v4half %m %uint_0
+         %27 = OpLoad %v4half %26
+         %30 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %31 = OpLoad %v4half %30
+         %34 = OpAccessChain %_ptr_Uniform_v4half %m %uint_2
+         %35 = OpLoad %v4half %34
+         %36 = OpCompositeConstruct %mat3v4half %27 %31 %35
+               OpReturnValue %36
+               OpFunctionEnd
+          %f = OpFunction %void None %37
+         %40 = OpLabel
+         %41 = OpFunctionCall %mat3v4half %load_m_inner
+         %42 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %43 = OpLoad %v4half %42
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..a814872
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat3x4<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat3x4<f16> = *(p_m);
+  let l_m_1 : vec4<f16> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl
new file mode 100644
index 0000000..289193b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..80a685f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,36 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 3> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c2fde4f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,41 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 3> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001DE25EAC9C0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..ea7d264
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+} u;
+
+f16mat3x4 load_u_inner() {
+  return f16mat3x4(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  f16mat4x3 t = transpose(load_u_inner());
+  float16_t l = length(u.inner_1);
+  float16_t a = abs(u.inner_0.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..45f91fd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half3x4* tint_symbol [[buffer(0)]]) {
+  half4x3 const t = transpose(*(tint_symbol));
+  half const l = length((*(tint_symbol))[1]);
+  half const a = fabs(half4((*(tint_symbol))[0]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..da62171
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,70 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %35 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v4half = OpTypeMatrix %v4half 3
+          %6 = OpTypeFunction %mat3v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %26 = OpTypeFunction %void
+     %v3half = OpTypeVector %half 3
+ %mat4v3half = OpTypeMatrix %v3half 4
+         %42 = OpConstantNull %uint
+%load_u_inner = OpFunction %mat3v4half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %16 = OpLoad %v4half %15
+         %19 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %20 = OpLoad %v4half %19
+         %23 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %24 = OpLoad %v4half %23
+         %25 = OpCompositeConstruct %mat3v4half %16 %20 %24
+               OpReturnValue %25
+               OpFunctionEnd
+          %f = OpFunction %void None %26
+         %29 = OpLabel
+         %33 = OpFunctionCall %mat3v4half %load_u_inner
+         %30 = OpTranspose %mat4v3half %33
+         %36 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %37 = OpLoad %v4half %36
+         %34 = OpExtInst %half %35 Length %37
+         %39 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %40 = OpLoad %v4half %39
+         %41 = OpVectorShuffle %v4half %40 %40 1 3 0 2
+         %43 = OpCompositeExtract %half %41 0
+         %38 = OpExtInst %half %35 FAbs %43
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..2dd842a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl
new file mode 100644
index 0000000..842e96b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x4<f16>;
+
+fn a(m : mat3x4<f16>) {}
+fn b(v : vec4<f16>) {}
+fn c(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].ywxz);
+    c(u[1].x);
+    c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e92c783
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,50 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(matrix<float16_t, 3, 4> m) {
+}
+
+void b(vector<float16_t, 4> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  b(vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+  uint2 ubo_load_7 = u[0].zw;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  b(vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  c(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..be27f56
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,57 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(matrix<float16_t, 3, 4> m) {
+}
+
+void b(vector<float16_t, 4> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_6 = u[0].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  b(vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+  uint2 ubo_load_7 = u[0].zw;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  b(vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  c(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002E63B0EDB50(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002E63B0EDB50(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002E63B0EDB50(11,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..94fbbaa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,35 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+} u;
+
+void a(f16mat3x4 m) {
+}
+
+void b(f16vec4 v) {
+}
+
+void c(float16_t f_1) {
+}
+
+f16mat3x4 load_u_inner() {
+  return f16mat3x4(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  a(load_u_inner());
+  b(u.inner_1);
+  b(u.inner_1.ywxz);
+  c(u.inner_1[0u]);
+  c(u.inner_1.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..8f44101
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(half3x4 m) {
+}
+
+void b(half4 v) {
+}
+
+void c(half f_1) {
+}
+
+kernel void f(const constant half3x4* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(half4((*(tint_symbol))[1]).ywxz);
+  c((*(tint_symbol))[1][0]);
+  c(half4((*(tint_symbol))[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..13f9566
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,99 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 61
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat3v4half = OpTypeMatrix %v4half 3
+          %6 = OpTypeFunction %void %mat3v4half
+         %12 = OpTypeFunction %void %v4half
+         %16 = OpTypeFunction %void %half
+         %20 = OpTypeFunction %mat3v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+         %39 = OpTypeFunction %void
+         %52 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+          %a = OpFunction %void None %6
+          %m = OpFunctionParameter %mat3v4half
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v4half
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %half
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%load_u_inner = OpFunction %mat3v4half None %20
+         %22 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %29 = OpLoad %v4half %28
+         %32 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %33 = OpLoad %v4half %32
+         %36 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %37 = OpLoad %v4half %36
+         %38 = OpCompositeConstruct %mat3v4half %29 %33 %37
+               OpReturnValue %38
+               OpFunctionEnd
+          %f = OpFunction %void None %39
+         %41 = OpLabel
+         %43 = OpFunctionCall %mat3v4half %load_u_inner
+         %42 = OpFunctionCall %void %a %43
+         %45 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %46 = OpLoad %v4half %45
+         %44 = OpFunctionCall %void %b %46
+         %48 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %49 = OpLoad %v4half %48
+         %50 = OpVectorShuffle %v4half %49 %49 1 3 0 2
+         %47 = OpFunctionCall %void %b %50
+         %54 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %52
+         %55 = OpLoad %half %54
+         %51 = OpFunctionCall %void %c %55
+         %57 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %58 = OpLoad %v4half %57
+         %59 = OpVectorShuffle %v4half %58 %58 1 3 0 2
+         %60 = OpCompositeExtract %half %59 0
+         %56 = OpFunctionCall %void %c %60
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..87d6244
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x4<f16>;
+
+fn a(m : mat3x4<f16>) {
+}
+
+fn b(v : vec4<f16>) {
+}
+
+fn c(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].ywxz);
+  c(u[1].x);
+  c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl
new file mode 100644
index 0000000..cd35569
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x4<f16>;
+var<private> p : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].ywxz;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..1d40f2b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,38 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static matrix<float16_t, 3, 4> p = matrix<float16_t, 3, 4>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..6db6a05
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,43 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static matrix<float16_t, 3, 4> p = matrix<float16_t, 3, 4>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002EEEBC7D430(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..3adf16e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+} u;
+
+f16mat3x4 p = f16mat3x4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf);
+f16mat3x4 load_u_inner() {
+  return f16mat3x4(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  p = load_u_inner();
+  p[1] = u.inner_0;
+  p[1] = u.inner_0.ywxz;
+  p[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..2293b30
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half3x4* tint_symbol_1 [[buffer(0)]]) {
+  thread half3x4 tint_symbol = half3x4(0.0h);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = half4((*(tint_symbol_1))[0]).ywxz;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..03f3553
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,82 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 51
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v4half = OpTypeMatrix %v4half 3
+%_ptr_Private_mat3v4half = OpTypePointer Private %mat3v4half
+          %9 = OpConstantNull %mat3v4half
+          %p = OpVariable %_ptr_Private_mat3v4half Private %9
+         %10 = OpTypeFunction %mat3v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v4half = OpTypePointer Private %v4half
+         %44 = OpConstantNull %int
+%_ptr_Private_half = OpTypePointer Private %half
+         %47 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat3v4half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %19 = OpLoad %v4half %18
+         %22 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %23 = OpLoad %v4half %22
+         %26 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %27 = OpLoad %v4half %26
+         %28 = OpCompositeConstruct %mat3v4half %19 %23 %27
+               OpReturnValue %28
+               OpFunctionEnd
+          %f = OpFunction %void None %29
+         %32 = OpLabel
+         %33 = OpFunctionCall %mat3v4half %load_u_inner
+               OpStore %p %33
+         %37 = OpAccessChain %_ptr_Private_v4half %p %int_1
+         %38 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %39 = OpLoad %v4half %38
+               OpStore %37 %39
+         %40 = OpAccessChain %_ptr_Private_v4half %p %int_1
+         %41 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %42 = OpLoad %v4half %41
+         %43 = OpVectorShuffle %v4half %42 %42 1 3 0 2
+               OpStore %40 %43
+         %46 = OpAccessChain %_ptr_Private_half %p %44 %int_1
+         %49 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %47
+         %50 = OpLoad %half %49
+               OpStore %46 %50
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..d80add7d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x4<f16>;
+
+var<private> p : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].ywxz;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl
new file mode 100644
index 0000000..a57d0c6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x4<f16>;
+@group(0) @binding(1) var<storage, read_write> s : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].ywxz;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a4c97e9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,44 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+}
+
+matrix<float16_t, 3, 4> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..627d5f9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,50 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 3, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+}
+
+matrix<float16_t, 3, 4> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002438C2DBBB0(6,66-74): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002438C2DBBB0(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..ac8aaf9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat3x4 inner;
+} s;
+
+f16mat3x4 load_u_inner() {
+  return f16mat3x4(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f() {
+  s.inner = load_u_inner();
+  s.inner[1] = u.inner_0;
+  s.inner[1] = u.inner_0.ywxz;
+  s.inner[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..7684631
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device half3x4* tint_symbol [[buffer(1)]], const constant half3x4* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..6f45e68
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,92 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 53
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 8
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v4half = OpTypeMatrix %v4half 3
+    %u_block = OpTypeStruct %mat3v4half
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %10 = OpTypeFunction %mat3v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+%_ptr_StorageBuffer_mat3v4half = OpTypePointer StorageBuffer %mat3v4half
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+         %46 = OpConstantNull %int
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+         %49 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat3v4half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %19 = OpLoad %v4half %18
+         %22 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %23 = OpLoad %v4half %22
+         %26 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %27 = OpLoad %v4half %26
+         %28 = OpCompositeConstruct %mat3v4half %19 %23 %27
+               OpReturnValue %28
+               OpFunctionEnd
+          %f = OpFunction %void None %29
+         %32 = OpLabel
+         %34 = OpAccessChain %_ptr_StorageBuffer_mat3v4half %s %uint_0
+         %35 = OpFunctionCall %mat3v4half %load_u_inner
+               OpStore %34 %35
+         %39 = OpAccessChain %_ptr_StorageBuffer_v4half %s %uint_0 %int_1
+         %40 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %41 = OpLoad %v4half %40
+               OpStore %39 %41
+         %42 = OpAccessChain %_ptr_StorageBuffer_v4half %s %uint_0 %int_1
+         %43 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %44 = OpLoad %v4half %43
+         %45 = OpVectorShuffle %v4half %44 %44 1 3 0 2
+               OpStore %42 %45
+         %48 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %46 %int_1
+         %51 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %49
+         %52 = OpLoad %half %51
+               OpStore %48 %52
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..9a28896
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x4<f16>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].ywxz;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..6c66c2f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x4<f16>;
+var<workgroup> w : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].ywxz;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5dd6888
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,50 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared matrix<float16_t, 3, 4> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 4> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 3, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..9774f46
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,55 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared matrix<float16_t, 3, 4> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 3, 4> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 3, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_6 = u[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]);
+  uint2 ubo_load_7 = u[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_7_yw = vector<float16_t, 2>(f16tof32(ubo_load_7 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_7_xz[0], ubo_load_7_yw[0], ubo_load_7_xz[1], ubo_load_7_yw[1]).ywxz;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002AEEC70C750(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..d937cd8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+} u;
+
+shared f16mat3x4 w;
+f16mat3x4 load_u_inner() {
+  return f16mat3x4(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void f(uint local_invocation_index) {
+  {
+    w = f16mat3x4(f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf));
+  }
+  barrier();
+  w = load_u_inner();
+  w[1] = u.inner_0;
+  w[1] = u.inner_0.ywxz;
+  w[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..8291498
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  half3x4 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup half3x4* const tint_symbol, const constant half3x4* const tint_symbol_1) {
+  {
+    *(tint_symbol) = half3x4(half4(0.0h), half4(0.0h), half4(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant half3x4* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup half3x4* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..962956a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,99 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 61
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v4half = OpTypeMatrix %v4half 3
+%_ptr_Workgroup_mat3v4half = OpTypePointer Workgroup %mat3v4half
+          %w = OpVariable %_ptr_Workgroup_mat3v4half Workgroup
+         %12 = OpTypeFunction %mat3v4half
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %30 = OpTypeFunction %void %uint
+         %35 = OpConstantNull %mat3v4half
+   %uint_264 = OpConstant %uint 264
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v4half = OpTypePointer Workgroup %v4half
+         %49 = OpConstantNull %int
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+         %52 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+         %56 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat3v4half None %12
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %20 = OpLoad %v4half %19
+         %23 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %24 = OpLoad %v4half %23
+         %27 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %28 = OpLoad %v4half %27
+         %29 = OpCompositeConstruct %mat3v4half %20 %24 %28
+               OpReturnValue %29
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %30
+%local_invocation_index = OpFunctionParameter %uint
+         %34 = OpLabel
+               OpStore %w %35
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %38 = OpFunctionCall %mat3v4half %load_u_inner
+               OpStore %w %38
+         %42 = OpAccessChain %_ptr_Workgroup_v4half %w %int_1
+         %43 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %44 = OpLoad %v4half %43
+               OpStore %42 %44
+         %45 = OpAccessChain %_ptr_Workgroup_v4half %w %int_1
+         %46 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %47 = OpLoad %v4half %46
+         %48 = OpVectorShuffle %v4half %47 %47 1 3 0 2
+               OpStore %45 %48
+         %51 = OpAccessChain %_ptr_Workgroup_half %w %49 %int_1
+         %54 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %52
+         %55 = OpLoad %half %54
+               OpStore %51 %55
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %56
+         %58 = OpLabel
+         %60 = OpLoad %uint %local_invocation_index_1
+         %59 = OpFunctionCall %void %f_inner %60
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..911c3d7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x4<f16>;
+
+var<workgroup> w : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].ywxz;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..a737e2d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat3x4<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat3x4<f32> = *p_m;
+  let l_m_i : vec4<f32>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..4fad9a2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,25 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[3];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float3x4 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_3 = ((16u * uint(p_m_i_save))) / 4;
+  const float4 l_m_i = asfloat(m[scalar_offset_3 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..4fad9a2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,25 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[3];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float3x4 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_3 = ((16u * uint(p_m_i_save))) / 4;
+  const float4 l_m_i = asfloat(m[scalar_offset_3 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..405239b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat3x4 inner;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_m_i_save = tint_symbol;
+  mat3x4 l_m = m.inner;
+  vec4 l_m_i = m.inner[p_m_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..8774e83
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant float3x4* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  float3x4 const l_m = *(tint_symbol_2);
+  float4 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..65b4895
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+    %m_block = OpTypeStruct %mat3v4float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %22 = OpFunctionCall %int %i
+         %26 = OpAccessChain %_ptr_Uniform_mat3v4float %m %uint_0
+         %27 = OpLoad %mat3v4float %26
+         %29 = OpAccessChain %_ptr_Uniform_v4float %m %uint_0 %22
+         %30 = OpLoad %v4float %29
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..625ea9a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat3x4<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat3x4<f32> = *(p_m);
+  let l_m_i : vec4<f32> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..62f9041
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat3x4<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat3x4<f32> = *p_m;
+  let l_m_1 : vec4<f32>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c45de93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[3];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 l_m = tint_symbol(m, 0u);
+  const float4 l_m_1 = asfloat(m[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c45de93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[3];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 l_m = tint_symbol(m, 0u);
+  const float4 l_m_1 = asfloat(m[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..0074214
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,16 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat3x4 inner;
+} m;
+
+void f() {
+  mat3x4 l_m = m.inner;
+  vec4 l_m_1 = m.inner[1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..e48a04b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant float3x4* tint_symbol_1 [[buffer(0)]]) {
+  float3x4 const l_m = *(tint_symbol_1);
+  float4 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..e50e62b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+    %m_block = OpTypeStruct %mat3v4float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %25 = OpAccessChain %_ptr_Uniform_mat3v4float %m %uint_0
+         %26 = OpLoad %mat3v4float %25
+         %28 = OpAccessChain %_ptr_Uniform_v4float %m %uint_0 %int_1
+         %29 = OpLoad %v4float %28
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..ecef1a6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat3x4<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat3x4<f32> = *(p_m);
+  let l_m_1 : vec4<f32> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl
new file mode 100644
index 0000000..9d0578e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..619729e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1]));
+  const float a = abs(asfloat(u[0]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..619729e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,18 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1]));
+  const float a = abs(asfloat(u[0]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..f7f6904
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3x4 inner;
+} u;
+
+void f() {
+  mat4x3 t = transpose(u.inner);
+  float l = length(u.inner[1]);
+  float a = abs(u.inner[0].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..78ddda6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float3x4* tint_symbol [[buffer(0)]]) {
+  float4x3 const t = transpose(*(tint_symbol));
+  float const l = length((*(tint_symbol))[1]);
+  float const a = fabs(float4((*(tint_symbol))[0]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..150517c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,53 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 32
+; Schema: 0
+               OpCapability Shader
+         %20 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+    %u_block = OpTypeStruct %mat3v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+         %27 = OpConstantNull %int
+          %f = OpFunction %void None %7
+         %10 = OpLabel
+         %17 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0
+         %18 = OpLoad %mat3v4float %17
+         %11 = OpTranspose %mat4v3float %18
+         %24 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %25 = OpLoad %v4float %24
+         %19 = OpExtInst %float %20 Length %25
+         %28 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %27
+         %29 = OpLoad %v4float %28
+         %30 = OpVectorShuffle %v4float %29 %29 1 3 0 2
+         %31 = OpCompositeExtract %float %30 0
+         %26 = OpExtInst %float %20 FAbs %31
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..cd3b10d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl
new file mode 100644
index 0000000..8e98ca7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : mat3x4<f32>;
+
+fn a(m : mat3x4<f32>) {}
+fn b(v : vec4<f32>) {}
+fn c(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].ywxz);
+    c(u[1].x);
+    c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..697aafc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,29 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+void a(float3x4 m) {
+}
+
+void b(float4 v) {
+}
+
+void c(float f_1) {
+}
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1]));
+  b(asfloat(u[1]).ywxz);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..697aafc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,29 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+void a(float3x4 m) {
+}
+
+void b(float4 v) {
+}
+
+void c(float f_1) {
+}
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1]));
+  b(asfloat(u[1]).ywxz);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..620404b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,28 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3x4 inner;
+} u;
+
+void a(mat3x4 m) {
+}
+
+void b(vec4 v) {
+}
+
+void c(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  b(u.inner[1].ywxz);
+  c(u.inner[1].x);
+  c(u.inner[1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..1097f7e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(float3x4 m) {
+}
+
+void b(float4 v) {
+}
+
+void c(float f_1) {
+}
+
+kernel void f(const constant float3x4* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(float4((*(tint_symbol))[1]).ywxz);
+  c((*(tint_symbol))[1][0]);
+  c(float4((*(tint_symbol))[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..2e87c98
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+    %u_block = OpTypeStruct %mat3v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void %mat3v4float
+         %12 = OpTypeFunction %void %v4float
+         %16 = OpTypeFunction %void %float
+         %20 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %a = OpFunction %void None %7
+          %m = OpFunctionParameter %mat3v4float
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v4float
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %float
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %20
+         %22 = OpLabel
+         %27 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0
+         %28 = OpLoad %mat3v4float %27
+         %23 = OpFunctionCall %void %a %28
+         %33 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %34 = OpLoad %v4float %33
+         %29 = OpFunctionCall %void %b %34
+         %36 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %37 = OpLoad %v4float %36
+         %38 = OpVectorShuffle %v4float %37 %37 1 3 0 2
+         %35 = OpFunctionCall %void %b %38
+         %41 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %uint_0
+         %42 = OpLoad %float %41
+         %39 = OpFunctionCall %void %c %42
+         %44 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %45 = OpLoad %v4float %44
+         %46 = OpVectorShuffle %v4float %45 %45 1 3 0 2
+         %47 = OpCompositeExtract %float %46 0
+         %43 = OpFunctionCall %void %c %47
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..cb1ecfb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,19 @@
+@group(0) @binding(0) var<uniform> u : mat3x4<f32>;
+
+fn a(m : mat3x4<f32>) {
+}
+
+fn b(v : vec4<f32>) {
+}
+
+fn c(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].ywxz);
+  c(u[1].x);
+  c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl
new file mode 100644
index 0000000..1cfdf67
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat3x4<f32>;
+var<private> p : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].ywxz;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f6518d9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,20 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+static float3x4 p = float3x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0]);
+  p[1] = asfloat(u[0]).ywxz;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f6518d9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,20 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+static float3x4 p = float3x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0]);
+  p[1] = asfloat(u[0]).ywxz;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..ebbbb86
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3x4 inner;
+} u;
+
+mat3x4 p = mat3x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+void f() {
+  p = u.inner;
+  p[1] = u.inner[0];
+  p[1] = u.inner[0].ywxz;
+  p[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..a073777
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float3x4* tint_symbol_1 [[buffer(0)]]) {
+  thread float3x4 tint_symbol = float3x4(0.0f);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = float4((*(tint_symbol_1))[0]).ywxz;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..82befc1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+    %u_block = OpTypeStruct %mat3v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private_mat3v4float = OpTypePointer Private %mat3v4float
+          %9 = OpConstantNull %mat3v4float
+          %p = OpVariable %_ptr_Private_mat3v4float Private %9
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v4float = OpTypePointer Private %v4float
+         %23 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %17 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0
+         %18 = OpLoad %mat3v4float %17
+               OpStore %p %18
+         %22 = OpAccessChain %_ptr_Private_v4float %p %int_1
+         %25 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %23
+         %26 = OpLoad %v4float %25
+               OpStore %22 %26
+         %27 = OpAccessChain %_ptr_Private_v4float %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %23
+         %29 = OpLoad %v4float %28
+         %30 = OpVectorShuffle %v4float %29 %29 1 3 0 2
+               OpStore %27 %30
+         %32 = OpAccessChain %_ptr_Private_float %p %23 %int_1
+         %34 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %23
+         %35 = OpLoad %float %34
+               OpStore %32 %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..41ffb6b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat3x4<f32>;
+
+var<private> p : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].ywxz;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl
new file mode 100644
index 0000000..1742cf7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat3x4<f32>;
+@group(0) @binding(1) var<storage, read_write> s : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].ywxz;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7891cea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,26 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+float3x4 tint_symbol_2(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store4(16u, asuint(asfloat(u[0])));
+  s.Store4(16u, asuint(asfloat(u[0]).ywxz));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7891cea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,26 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float3x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+}
+
+float3x4 tint_symbol_2(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store4(16u, asuint(asfloat(u[0])));
+  s.Store4(16u, asuint(asfloat(u[0]).ywxz));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..358b0c2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3x4 inner;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat3x4 inner;
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[0];
+  s.inner[1] = u.inner[0].ywxz;
+  s.inner[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..d7903f5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device float3x4* tint_symbol [[buffer(1)]], const constant float3x4* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..8ca73c9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+    %u_block = OpTypeStruct %mat3v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+          %9 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat3v4float = OpTypePointer StorageBuffer %mat3v4float
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+         %24 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %9
+         %12 = OpLabel
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat3v4float %s %uint_0
+         %18 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0
+         %19 = OpLoad %mat3v4float %18
+               OpStore %16 %19
+         %23 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1
+         %26 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %24
+         %27 = OpLoad %v4float %26
+               OpStore %23 %27
+         %28 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %24
+         %30 = OpLoad %v4float %29
+         %31 = OpVectorShuffle %v4float %30 %30 1 3 0 2
+               OpStore %28 %31
+         %33 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %24 %int_1
+         %35 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %24
+         %36 = OpLoad %float %35
+               OpStore %33 %36
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..4d2e528
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat3x4<f32>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].ywxz;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..5266522
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat3x4<f32>;
+var<workgroup> w : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].ywxz;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..84b835f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,32 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+groupshared float3x4 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x4 tint_symbol_2(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0]);
+  w[1] = asfloat(u[0]).ywxz;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..84b835f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,32 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+groupshared float3x4 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float3x4 tint_symbol_2(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float3x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0]);
+  w[1] = asfloat(u[0]).ywxz;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..bf3eb87
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3x4 inner;
+} u;
+
+shared mat3x4 w;
+void f(uint local_invocation_index) {
+  {
+    w = mat3x4(vec4(0.0f), vec4(0.0f), vec4(0.0f));
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[0];
+  w[1] = u.inner[0].ywxz;
+  w[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..37ed55e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  float3x4 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup float3x4* const tint_symbol, const constant float3x4* const tint_symbol_1) {
+  {
+    *(tint_symbol) = float3x4(float4(0.0f), float4(0.0f), float4(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant float3x4* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup float3x4* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..7d91714
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+    %u_block = OpTypeStruct %mat3v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup_mat3v4float = OpTypePointer Workgroup %mat3v4float
+          %w = OpVariable %_ptr_Workgroup_mat3v4float Workgroup
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %uint
+         %17 = OpConstantNull %mat3v4float
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+         %29 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %42 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %12
+%local_invocation_index = OpFunctionParameter %uint
+         %16 = OpLabel
+               OpStore %w %17
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %23 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0
+         %24 = OpLoad %mat3v4float %23
+               OpStore %w %24
+         %28 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1
+         %31 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %29
+         %32 = OpLoad %v4float %31
+               OpStore %28 %32
+         %33 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1
+         %34 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %29
+         %35 = OpLoad %v4float %34
+         %36 = OpVectorShuffle %v4float %35 %35 1 3 0 2
+               OpStore %33 %36
+         %38 = OpAccessChain %_ptr_Workgroup_float %w %29 %int_1
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %29
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %42
+         %44 = OpLabel
+         %46 = OpLoad %uint %local_invocation_index_1
+         %45 = OpFunctionCall %void %f_inner %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..05782a8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat3x4_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat3x4<f32>;
+
+var<workgroup> w : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].ywxz;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..b9a4d8d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x2<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat4x2<f16> = *p_m;
+  let l_m_i : vec2<f16>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..fe97f69
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,31 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 4, 2> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_4 = ((4u * uint(p_m_i_save))) / 4;
+  uint ubo_load_4 = m[scalar_offset_4 / 4][scalar_offset_4 % 4];
+  const vector<float16_t, 2> l_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..492df55
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,36 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 4, 2> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_4 = ((4u * uint(p_m_i_save))) / 4;
+  uint ubo_load_4 = m[scalar_offset_4 / 4][scalar_offset_4 % 4];
+  const vector<float16_t, 2> l_m_i = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000013D9C70AA90(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..ade941c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,58 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+  f16vec2 inner_3;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat4x2 load_m_inner() {
+  return f16mat4x2(m.inner_0, m.inner_1, m.inner_2, m.inner_3);
+}
+
+f16vec2 load_m_inner_p0(uint p0) {
+  switch(p0) {
+    case 0u: {
+      return m.inner_0;
+      break;
+    }
+    case 1u: {
+      return m.inner_1;
+      break;
+    }
+    case 2u: {
+      return m.inner_2;
+      break;
+    }
+    case 3u: {
+      return m.inner_3;
+      break;
+    }
+    default: {
+      return f16vec2(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat4x2 p_m = load_m_inner();
+  int tint_symbol = i();
+  f16vec2 p_m_i = load_m_inner_p0(uint(tint_symbol));
+  f16mat4x2 l_m = load_m_inner();
+  f16vec2 l_m_i = load_m_inner_p0(uint(tint_symbol));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..756ba6d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant half4x2* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  half4x2 const l_m = *(tint_symbol_2);
+  half2 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..c3c3369
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,111 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 68
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpMemberName %m_block_std140 3 "inner_3"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %load_m_inner_p0 "load_m_inner_p0"
+               OpName %p0 "p0"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 4
+               OpMemberDecorate %m_block_std140 2 Offset 8
+               OpMemberDecorate %m_block_std140 3 Offset 12
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%m_block_std140 = OpTypeStruct %v2half %v2half %v2half %v2half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v2half = OpTypeMatrix %v2half 4
+         %17 = OpTypeFunction %mat4v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+         %41 = OpTypeFunction %v2half %uint
+         %59 = OpConstantNull %v2half
+       %void = OpTypeVoid
+         %60 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat4v2half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v2half %m %uint_0
+         %27 = OpLoad %v2half %26
+         %30 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %31 = OpLoad %v2half %30
+         %34 = OpAccessChain %_ptr_Uniform_v2half %m %uint_2
+         %35 = OpLoad %v2half %34
+         %38 = OpAccessChain %_ptr_Uniform_v2half %m %uint_3
+         %39 = OpLoad %v2half %38
+         %40 = OpCompositeConstruct %mat4v2half %27 %31 %35 %39
+               OpReturnValue %40
+               OpFunctionEnd
+%load_m_inner_p0 = OpFunction %v2half None %41
+         %p0 = OpFunctionParameter %uint
+         %44 = OpLabel
+               OpSelectionMerge %45 None
+               OpSwitch %p0 %46 0 %47 1 %48 2 %49 3 %50
+         %47 = OpLabel
+         %51 = OpAccessChain %_ptr_Uniform_v2half %m %uint_0
+         %52 = OpLoad %v2half %51
+               OpReturnValue %52
+         %48 = OpLabel
+         %53 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %54 = OpLoad %v2half %53
+               OpReturnValue %54
+         %49 = OpLabel
+         %55 = OpAccessChain %_ptr_Uniform_v2half %m %uint_2
+         %56 = OpLoad %v2half %55
+               OpReturnValue %56
+         %50 = OpLabel
+         %57 = OpAccessChain %_ptr_Uniform_v2half %m %uint_3
+         %58 = OpLoad %v2half %57
+               OpReturnValue %58
+         %46 = OpLabel
+               OpReturnValue %59
+         %45 = OpLabel
+               OpReturnValue %59
+               OpFunctionEnd
+          %f = OpFunction %void None %60
+         %63 = OpLabel
+         %64 = OpFunctionCall %int %i
+         %65 = OpFunctionCall %mat4v2half %load_m_inner
+         %67 = OpBitcast %uint %64
+         %66 = OpFunctionCall %v2half %load_m_inner_p0 %67
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..4f04f9e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x2<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat4x2<f16> = *(p_m);
+  let l_m_i : vec2<f16> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..26076b16
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x2<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat4x2<f16> = *p_m;
+  let l_m_1 : vec2<f16>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e56d25a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,29 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 2> l_m = tint_symbol(m, 0u);
+  uint ubo_load_4 = m[0].y;
+  const vector<float16_t, 2> l_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d7d572d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,34 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[1];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 2> l_m = tint_symbol(m, 0u);
+  uint ubo_load_4 = m[0].y;
+  const vector<float16_t, 2> l_m_1 = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000025078B145D0(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..7c754a6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+  f16vec2 inner_3;
+} m;
+
+f16mat4x2 load_m_inner() {
+  return f16mat4x2(m.inner_0, m.inner_1, m.inner_2, m.inner_3);
+}
+
+void f() {
+  f16mat4x2 p_m = load_m_inner();
+  f16vec2 p_m_1 = m.inner_1;
+  f16mat4x2 l_m = load_m_inner();
+  f16vec2 l_m_1 = m.inner_1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..a054f04
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant half4x2* tint_symbol_1 [[buffer(0)]]) {
+  half4x2 const l_m = *(tint_symbol_1);
+  half2 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..216db17
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpMemberName %m_block_std140 3 "inner_3"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 4
+               OpMemberDecorate %m_block_std140 2 Offset 8
+               OpMemberDecorate %m_block_std140 3 Offset 12
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%m_block_std140 = OpTypeStruct %v2half %v2half %v2half %v2half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v2half = OpTypeMatrix %v2half 4
+         %17 = OpTypeFunction %mat4v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %41 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat4v2half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v2half %m %uint_0
+         %27 = OpLoad %v2half %26
+         %30 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %31 = OpLoad %v2half %30
+         %34 = OpAccessChain %_ptr_Uniform_v2half %m %uint_2
+         %35 = OpLoad %v2half %34
+         %38 = OpAccessChain %_ptr_Uniform_v2half %m %uint_3
+         %39 = OpLoad %v2half %38
+         %40 = OpCompositeConstruct %mat4v2half %27 %31 %35 %39
+               OpReturnValue %40
+               OpFunctionEnd
+          %f = OpFunction %void None %41
+         %44 = OpLabel
+         %45 = OpFunctionCall %mat4v2half %load_m_inner
+         %46 = OpAccessChain %_ptr_Uniform_v2half %m %uint_1
+         %47 = OpLoad %v2half %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..b7111b4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x2<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat4x2<f16> = *(p_m);
+  let l_m_1 : vec2<f16> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl
new file mode 100644
index 0000000..f8e709a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..1c208e2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,25 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 4> t = transpose(tint_symbol(u, 0u));
+  uint ubo_load_4 = u[0].y;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))));
+  uint ubo_load_5 = u[0].x;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8f7f50f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,30 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 2, 4> t = transpose(tint_symbol(u, 0u));
+  uint ubo_load_4 = u[0].y;
+  const float16_t l = length(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))));
+  uint ubo_load_5 = u[0].x;
+  const float16_t a = abs(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FEE1A64990(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..9038781
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,25 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+  f16vec2 inner_3;
+} u;
+
+f16mat4x2 load_u_inner() {
+  return f16mat4x2(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  f16mat2x4 t = transpose(load_u_inner());
+  float16_t l = length(u.inner_1);
+  float16_t a = abs(u.inner_0.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..1b137c5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half4x2* tint_symbol [[buffer(0)]]) {
+  half2x4 const t = transpose(*(tint_symbol));
+  half const l = length((*(tint_symbol))[1]);
+  half const a = fabs(half2((*(tint_symbol))[0]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..22d89d9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,75 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %39 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpMemberDecorate %u_block_std140 3 Offset 12
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+          %6 = OpTypeFunction %mat4v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %30 = OpTypeFunction %void
+     %v4half = OpTypeVector %half 4
+ %mat2v4half = OpTypeMatrix %v4half 2
+         %46 = OpConstantNull %uint
+%load_u_inner = OpFunction %mat4v2half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %16 = OpLoad %v2half %15
+         %19 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %20 = OpLoad %v2half %19
+         %23 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %24 = OpLoad %v2half %23
+         %27 = OpAccessChain %_ptr_Uniform_v2half %u %uint_3
+         %28 = OpLoad %v2half %27
+         %29 = OpCompositeConstruct %mat4v2half %16 %20 %24 %28
+               OpReturnValue %29
+               OpFunctionEnd
+          %f = OpFunction %void None %30
+         %33 = OpLabel
+         %37 = OpFunctionCall %mat4v2half %load_u_inner
+         %34 = OpTranspose %mat2v4half %37
+         %40 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %41 = OpLoad %v2half %40
+         %38 = OpExtInst %half %39 Length %41
+         %43 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %44 = OpLoad %v2half %43
+         %45 = OpVectorShuffle %v2half %44 %44 1 0
+         %47 = OpCompositeExtract %half %45 0
+         %42 = OpExtInst %half %39 FAbs %47
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..1df8062
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl
new file mode 100644
index 0000000..34cf034
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x2<f16>;
+
+fn a(m : mat4x2<f16>) {}
+fn b(v : vec2<f16>) {}
+fn c(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].yx);
+    c(u[1].x);
+    c(u[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a5ffec0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,37 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+void a(matrix<float16_t, 4, 2> m) {
+}
+
+void b(vector<float16_t, 2> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint ubo_load_4 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))));
+  uint ubo_load_5 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx);
+  c(float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  uint ubo_load_6 = u[0].y;
+  c(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_6 & 0xFFFF)), float16_t(f16tof32(ubo_load_6 >> 16))).yx.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..1cae909
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,44 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+void a(matrix<float16_t, 4, 2> m) {
+}
+
+void b(vector<float16_t, 2> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint ubo_load_4 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))));
+  uint ubo_load_5 = u[0].y;
+  b(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx);
+  c(float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  uint ubo_load_6 = u[0].y;
+  c(vector<float16_t, 2>(float16_t(f16tof32(ubo_load_6 & 0xFFFF)), float16_t(f16tof32(ubo_load_6 >> 16))).yx.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F4F9D145E0(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F4F9D145E0(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F4F9D145E0(11,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..d4ed074
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,36 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+  f16vec2 inner_3;
+} u;
+
+void a(f16mat4x2 m) {
+}
+
+void b(f16vec2 v) {
+}
+
+void c(float16_t f_1) {
+}
+
+f16mat4x2 load_u_inner() {
+  return f16mat4x2(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  a(load_u_inner());
+  b(u.inner_1);
+  b(u.inner_1.yx);
+  c(u.inner_1[0u]);
+  c(u.inner_1.yx[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..25b867f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(half4x2 m) {
+}
+
+void b(half2 v) {
+}
+
+void c(half f_1) {
+}
+
+kernel void f(const constant half4x2* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(half2((*(tint_symbol))[1]).yx);
+  c((*(tint_symbol))[1][0]);
+  c(half2((*(tint_symbol))[1]).yx[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..8d44eb1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,104 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 65
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpMemberDecorate %u_block_std140 3 Offset 12
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat4v2half = OpTypeMatrix %v2half 4
+          %6 = OpTypeFunction %void %mat4v2half
+         %12 = OpTypeFunction %void %v2half
+         %16 = OpTypeFunction %void %half
+         %20 = OpTypeFunction %mat4v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+         %43 = OpTypeFunction %void
+         %56 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+          %a = OpFunction %void None %6
+          %m = OpFunctionParameter %mat4v2half
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v2half
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %half
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%load_u_inner = OpFunction %mat4v2half None %20
+         %22 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %29 = OpLoad %v2half %28
+         %32 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %33 = OpLoad %v2half %32
+         %36 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %37 = OpLoad %v2half %36
+         %40 = OpAccessChain %_ptr_Uniform_v2half %u %uint_3
+         %41 = OpLoad %v2half %40
+         %42 = OpCompositeConstruct %mat4v2half %29 %33 %37 %41
+               OpReturnValue %42
+               OpFunctionEnd
+          %f = OpFunction %void None %43
+         %45 = OpLabel
+         %47 = OpFunctionCall %mat4v2half %load_u_inner
+         %46 = OpFunctionCall %void %a %47
+         %49 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %50 = OpLoad %v2half %49
+         %48 = OpFunctionCall %void %b %50
+         %52 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %53 = OpLoad %v2half %52
+         %54 = OpVectorShuffle %v2half %53 %53 1 0
+         %51 = OpFunctionCall %void %b %54
+         %58 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %56
+         %59 = OpLoad %half %58
+         %55 = OpFunctionCall %void %c %59
+         %61 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %62 = OpLoad %v2half %61
+         %63 = OpVectorShuffle %v2half %62 %62 1 0
+         %64 = OpCompositeExtract %half %63 0
+         %60 = OpFunctionCall %void %c %64
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..5bc57a0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x2<f16>;
+
+fn a(m : mat4x2<f16>) {
+}
+
+fn b(v : vec2<f16>) {
+}
+
+fn c(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].yx);
+  c(u[1].x);
+  c(u[1].yx.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl
new file mode 100644
index 0000000..558d302
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x2<f16>;
+var<private> p : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].yx;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..bbd9415
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,27 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+static matrix<float16_t, 4, 2> p = matrix<float16_t, 4, 2>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint ubo_load_4 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  uint ubo_load_5 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx;
+  p[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..04ae2e2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,32 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+static matrix<float16_t, 4, 2> p = matrix<float16_t, 4, 2>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint ubo_load_4 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  uint ubo_load_5 = u[0].x;
+  p[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx;
+  p[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001D1582D92D0(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..8c7a28a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,27 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+  f16vec2 inner_3;
+} u;
+
+f16mat4x2 p = f16mat4x2(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf);
+f16mat4x2 load_u_inner() {
+  return f16mat4x2(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  p = load_u_inner();
+  p[1] = u.inner_0;
+  p[1] = u.inner_0.yx;
+  p[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..aac8ea7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half4x2* tint_symbol_1 [[buffer(0)]]) {
+  thread half4x2 tint_symbol = half4x2(0.0h);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = half2((*(tint_symbol_1))[0]).yx;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..cc710ce
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,87 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 55
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpMemberDecorate %u_block_std140 3 Offset 12
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+%_ptr_Private_mat4v2half = OpTypePointer Private %mat4v2half
+          %9 = OpConstantNull %mat4v2half
+          %p = OpVariable %_ptr_Private_mat4v2half Private %9
+         %10 = OpTypeFunction %mat4v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v2half = OpTypePointer Private %v2half
+         %48 = OpConstantNull %int
+%_ptr_Private_half = OpTypePointer Private %half
+         %51 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat4v2half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %19 = OpLoad %v2half %18
+         %22 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %23 = OpLoad %v2half %22
+         %26 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %27 = OpLoad %v2half %26
+         %30 = OpAccessChain %_ptr_Uniform_v2half %u %uint_3
+         %31 = OpLoad %v2half %30
+         %32 = OpCompositeConstruct %mat4v2half %19 %23 %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %37 = OpFunctionCall %mat4v2half %load_u_inner
+               OpStore %p %37
+         %41 = OpAccessChain %_ptr_Private_v2half %p %int_1
+         %42 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %43 = OpLoad %v2half %42
+               OpStore %41 %43
+         %44 = OpAccessChain %_ptr_Private_v2half %p %int_1
+         %45 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %46 = OpLoad %v2half %45
+         %47 = OpVectorShuffle %v2half %46 %46 1 0
+               OpStore %44 %47
+         %50 = OpAccessChain %_ptr_Private_half %p %48 %int_1
+         %53 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %51
+         %54 = OpLoad %half %53
+               OpStore %50 %54
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..af9ce13
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x2<f16>;
+
+var<private> p : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].yx;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl
new file mode 100644
index 0000000..8d72f90
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x2<f16>;
+@group(0) @binding(1) var<storage, read_write> s : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].yx;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..df857d0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,34 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+matrix<float16_t, 4, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint ubo_load_4 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))));
+  uint ubo_load_5 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8218a85
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,40 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 2> value) {
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+}
+
+matrix<float16_t, 4, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint ubo_load_4 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16))));
+  uint ubo_load_5 = u[0].x;
+  s.Store<vector<float16_t, 2> >(4u, vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].y) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000252CF939A80(6,66-74): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000252CF939A80(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..b7358af
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+  f16vec2 inner_3;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat4x2 inner;
+} s;
+
+f16mat4x2 load_u_inner() {
+  return f16mat4x2(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  s.inner = load_u_inner();
+  s.inner[1] = u.inner_0;
+  s.inner[1] = u.inner_0.yx;
+  s.inner[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..e3cb6f6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device half4x2* tint_symbol [[buffer(1)]], const constant half4x2* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half2((*(tint_symbol_1))[0]).yx;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..8f53935
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,97 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 57
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpMemberDecorate %u_block_std140 3 Offset 12
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 4
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+    %u_block = OpTypeStruct %mat4v2half
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %10 = OpTypeFunction %mat4v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+%_ptr_StorageBuffer_mat4v2half = OpTypePointer StorageBuffer %mat4v2half
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v2half = OpTypePointer StorageBuffer %v2half
+         %50 = OpConstantNull %int
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+         %53 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat4v2half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %19 = OpLoad %v2half %18
+         %22 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %23 = OpLoad %v2half %22
+         %26 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %27 = OpLoad %v2half %26
+         %30 = OpAccessChain %_ptr_Uniform_v2half %u %uint_3
+         %31 = OpLoad %v2half %30
+         %32 = OpCompositeConstruct %mat4v2half %19 %23 %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_mat4v2half %s %uint_0
+         %39 = OpFunctionCall %mat4v2half %load_u_inner
+               OpStore %38 %39
+         %43 = OpAccessChain %_ptr_StorageBuffer_v2half %s %uint_0 %int_1
+         %44 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %45 = OpLoad %v2half %44
+               OpStore %43 %45
+         %46 = OpAccessChain %_ptr_StorageBuffer_v2half %s %uint_0 %int_1
+         %47 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %48 = OpLoad %v2half %47
+         %49 = OpVectorShuffle %v2half %48 %48 1 0
+               OpStore %46 %49
+         %52 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %50 %int_1
+         %55 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %53
+         %56 = OpLoad %half %55
+               OpStore %52 %56
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..c58a21e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x2<f16>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].yx;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..a629aa6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x2<f16>;
+var<workgroup> w : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].yx;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a3baa30
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,39 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+groupshared matrix<float16_t, 4, 2> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 4, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint ubo_load_4 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  uint ubo_load_5 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx;
+  w[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..26ab4d8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,44 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+groupshared matrix<float16_t, 4, 2> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 2> tint_symbol_2(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 4, 2>((float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx, (float16_t(0.0h)).xx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint ubo_load_4 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_4 & 0xFFFF)), float16_t(f16tof32(ubo_load_4 >> 16)));
+  uint ubo_load_5 = u[0].x;
+  w[1] = vector<float16_t, 2>(float16_t(f16tof32(ubo_load_5 & 0xFFFF)), float16_t(f16tof32(ubo_load_5 >> 16))).yx;
+  w[0][1] = float16_t(f16tof32(((u[0].y) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001DAFCDE42A0(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..931d9c6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,31 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+  f16vec2 inner_3;
+} u;
+
+shared f16mat4x2 w;
+f16mat4x2 load_u_inner() {
+  return f16mat4x2(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f(uint local_invocation_index) {
+  {
+    w = f16mat4x2(f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf), f16vec2(0.0hf));
+  }
+  barrier();
+  w = load_u_inner();
+  w[1] = u.inner_0;
+  w[1] = u.inner_0.yx;
+  w[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..ccec071
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  half4x2 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup half4x2* const tint_symbol, const constant half4x2* const tint_symbol_1) {
+  {
+    *(tint_symbol) = half4x2(half2(0.0h), half2(0.0h), half2(0.0h), half2(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half2((*(tint_symbol_1))[0]).yx;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant half4x2* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup half4x2* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..d158053
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,104 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 65
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpMemberDecorate %u_block_std140 3 Offset 12
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+%_ptr_Workgroup_mat4v2half = OpTypePointer Workgroup %mat4v2half
+          %w = OpVariable %_ptr_Workgroup_mat4v2half Workgroup
+         %12 = OpTypeFunction %mat4v2half
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %34 = OpTypeFunction %void %uint
+         %39 = OpConstantNull %mat4v2half
+   %uint_264 = OpConstant %uint 264
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v2half = OpTypePointer Workgroup %v2half
+         %53 = OpConstantNull %int
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+         %56 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+         %60 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat4v2half None %12
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %20 = OpLoad %v2half %19
+         %23 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %24 = OpLoad %v2half %23
+         %27 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %28 = OpLoad %v2half %27
+         %31 = OpAccessChain %_ptr_Uniform_v2half %u %uint_3
+         %32 = OpLoad %v2half %31
+         %33 = OpCompositeConstruct %mat4v2half %20 %24 %28 %32
+               OpReturnValue %33
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %34
+%local_invocation_index = OpFunctionParameter %uint
+         %38 = OpLabel
+               OpStore %w %39
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %42 = OpFunctionCall %mat4v2half %load_u_inner
+               OpStore %w %42
+         %46 = OpAccessChain %_ptr_Workgroup_v2half %w %int_1
+         %47 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %48 = OpLoad %v2half %47
+               OpStore %46 %48
+         %49 = OpAccessChain %_ptr_Workgroup_v2half %w %int_1
+         %50 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %51 = OpLoad %v2half %50
+         %52 = OpVectorShuffle %v2half %51 %51 1 0
+               OpStore %49 %52
+         %55 = OpAccessChain %_ptr_Workgroup_half %w %53 %int_1
+         %58 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %56
+         %59 = OpLoad %half %58
+               OpStore %55 %59
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %60
+         %62 = OpLabel
+         %64 = OpLoad %uint %local_invocation_index_1
+         %63 = OpFunctionCall %void %f_inner %64
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..75d4a9d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x2_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x2<f16>;
+
+var<workgroup> w : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].yx;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/dynamic_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/dynamic_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/static_index_via_ptr.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/static_index_via_ptr.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_builtin.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_builtin.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_fn.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_fn.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_private.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_private.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_storage.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_storage.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.glsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.msl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/std140/unnested/mat4x2/to_workgroup.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/std140/unnested/mat4x2_f32/to_workgroup.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..001dc40
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x3<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat4x3<f16> = *p_m;
+  let l_m_i : vec3<f16>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..d4f64e3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,46 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 4, 3> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_4 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_9 = m[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_m_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..73d44dc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,51 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 4, 3> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_4 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_9 = m[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_m_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000029A277BC3D0(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..09bc9f3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,58 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+  f16vec3 inner_3;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat4x3 load_m_inner() {
+  return f16mat4x3(m.inner_0, m.inner_1, m.inner_2, m.inner_3);
+}
+
+f16vec3 load_m_inner_p0(uint p0) {
+  switch(p0) {
+    case 0u: {
+      return m.inner_0;
+      break;
+    }
+    case 1u: {
+      return m.inner_1;
+      break;
+    }
+    case 2u: {
+      return m.inner_2;
+      break;
+    }
+    case 3u: {
+      return m.inner_3;
+      break;
+    }
+    default: {
+      return f16vec3(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat4x3 p_m = load_m_inner();
+  int tint_symbol = i();
+  f16vec3 p_m_i = load_m_inner_p0(uint(tint_symbol));
+  f16mat4x3 l_m = load_m_inner();
+  f16vec3 l_m_i = load_m_inner_p0(uint(tint_symbol));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..3e06cd5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant half4x3* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  half4x3 const l_m = *(tint_symbol_2);
+  half3 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..06b5911
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,111 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 68
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpMemberName %m_block_std140 3 "inner_3"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %load_m_inner_p0 "load_m_inner_p0"
+               OpName %p0 "p0"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpMemberDecorate %m_block_std140 2 Offset 16
+               OpMemberDecorate %m_block_std140 3 Offset 24
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%m_block_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v3half = OpTypeMatrix %v3half 4
+         %17 = OpTypeFunction %mat4v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+         %41 = OpTypeFunction %v3half %uint
+         %59 = OpConstantNull %v3half
+       %void = OpTypeVoid
+         %60 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat4v3half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v3half %m %uint_0
+         %27 = OpLoad %v3half %26
+         %30 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %31 = OpLoad %v3half %30
+         %34 = OpAccessChain %_ptr_Uniform_v3half %m %uint_2
+         %35 = OpLoad %v3half %34
+         %38 = OpAccessChain %_ptr_Uniform_v3half %m %uint_3
+         %39 = OpLoad %v3half %38
+         %40 = OpCompositeConstruct %mat4v3half %27 %31 %35 %39
+               OpReturnValue %40
+               OpFunctionEnd
+%load_m_inner_p0 = OpFunction %v3half None %41
+         %p0 = OpFunctionParameter %uint
+         %44 = OpLabel
+               OpSelectionMerge %45 None
+               OpSwitch %p0 %46 0 %47 1 %48 2 %49 3 %50
+         %47 = OpLabel
+         %51 = OpAccessChain %_ptr_Uniform_v3half %m %uint_0
+         %52 = OpLoad %v3half %51
+               OpReturnValue %52
+         %48 = OpLabel
+         %53 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %54 = OpLoad %v3half %53
+               OpReturnValue %54
+         %49 = OpLabel
+         %55 = OpAccessChain %_ptr_Uniform_v3half %m %uint_2
+         %56 = OpLoad %v3half %55
+               OpReturnValue %56
+         %50 = OpLabel
+         %57 = OpAccessChain %_ptr_Uniform_v3half %m %uint_3
+         %58 = OpLoad %v3half %57
+               OpReturnValue %58
+         %46 = OpLabel
+               OpReturnValue %59
+         %45 = OpLabel
+               OpReturnValue %59
+               OpFunctionEnd
+          %f = OpFunction %void None %60
+         %63 = OpLabel
+         %64 = OpFunctionCall %int %i
+         %65 = OpFunctionCall %mat4v3half %load_m_inner
+         %67 = OpBitcast %uint %64
+         %66 = OpFunctionCall %v3half %load_m_inner_p0 %67
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..90b5ce7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x3<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat4x3<f16> = *(p_m);
+  let l_m_i : vec3<f16> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..63121b8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x3<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat4x3<f16> = *p_m;
+  let l_m_1 : vec3<f16>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..46a10de
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,43 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 3> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_8 = m[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_m_1 = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..77207dc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,48 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 3> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_8 = m[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_m_1 = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001D61ABCB8F0(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..f99841d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+  f16vec3 inner_3;
+} m;
+
+f16mat4x3 load_m_inner() {
+  return f16mat4x3(m.inner_0, m.inner_1, m.inner_2, m.inner_3);
+}
+
+void f() {
+  f16mat4x3 p_m = load_m_inner();
+  f16vec3 p_m_1 = m.inner_1;
+  f16mat4x3 l_m = load_m_inner();
+  f16vec3 l_m_1 = m.inner_1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..d08b0d9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant half4x3* tint_symbol_1 [[buffer(0)]]) {
+  half4x3 const l_m = *(tint_symbol_1);
+  half3 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..1177f6a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpMemberName %m_block_std140 3 "inner_3"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpMemberDecorate %m_block_std140 2 Offset 16
+               OpMemberDecorate %m_block_std140 3 Offset 24
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%m_block_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v3half = OpTypeMatrix %v3half 4
+         %17 = OpTypeFunction %mat4v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %41 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat4v3half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v3half %m %uint_0
+         %27 = OpLoad %v3half %26
+         %30 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %31 = OpLoad %v3half %30
+         %34 = OpAccessChain %_ptr_Uniform_v3half %m %uint_2
+         %35 = OpLoad %v3half %34
+         %38 = OpAccessChain %_ptr_Uniform_v3half %m %uint_3
+         %39 = OpLoad %v3half %38
+         %40 = OpCompositeConstruct %mat4v3half %27 %31 %35 %39
+               OpReturnValue %40
+               OpFunctionEnd
+          %f = OpFunction %void None %41
+         %44 = OpLabel
+         %45 = OpFunctionCall %mat4v3half %load_m_inner
+         %46 = OpAccessChain %_ptr_Uniform_v3half %m %uint_1
+         %47 = OpLoad %v3half %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..458602c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x3<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat4x3<f16> = *(p_m);
+  let l_m_1 : vec3<f16> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl
new file mode 100644
index 0000000..8e8a955
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..34e0598
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 4> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]));
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..fa56e93
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 4> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]));
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000214C6C4DE30(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..699e138
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,25 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+  f16vec3 inner_3;
+} u;
+
+f16mat4x3 load_u_inner() {
+  return f16mat4x3(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  f16mat3x4 t = transpose(load_u_inner());
+  float16_t l = length(u.inner_1);
+  float16_t a = abs(u.inner_0.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..6bd3adc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half4x3* tint_symbol [[buffer(0)]]) {
+  half3x4 const t = transpose(*(tint_symbol));
+  half const l = length((*(tint_symbol))[1]);
+  half const a = fabs(half3((*(tint_symbol))[0]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..a5c7161
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,75 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %39 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+          %6 = OpTypeFunction %mat4v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %30 = OpTypeFunction %void
+     %v4half = OpTypeVector %half 4
+ %mat3v4half = OpTypeMatrix %v4half 3
+         %46 = OpConstantNull %uint
+%load_u_inner = OpFunction %mat4v3half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %16 = OpLoad %v3half %15
+         %19 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %20 = OpLoad %v3half %19
+         %23 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %24 = OpLoad %v3half %23
+         %27 = OpAccessChain %_ptr_Uniform_v3half %u %uint_3
+         %28 = OpLoad %v3half %27
+         %29 = OpCompositeConstruct %mat4v3half %16 %20 %24 %28
+               OpReturnValue %29
+               OpFunctionEnd
+          %f = OpFunction %void None %30
+         %33 = OpLabel
+         %37 = OpFunctionCall %mat4v3half %load_u_inner
+         %34 = OpTranspose %mat3v4half %37
+         %40 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %41 = OpLoad %v3half %40
+         %38 = OpExtInst %half %39 Length %41
+         %43 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %44 = OpLoad %v3half %43
+         %45 = OpVectorShuffle %v3half %44 %44 2 0 1
+         %47 = OpCompositeExtract %half %45 0
+         %42 = OpExtInst %half %39 FAbs %47
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..4443638
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl
new file mode 100644
index 0000000..9fd9ba4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x3<f16>;
+
+fn a(m : mat4x3<f16>) {}
+fn b(v : vec3<f16>) {}
+fn c(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].zxy);
+    c(u[1].x);
+    c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..778064a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,55 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(matrix<float16_t, 4, 3> m) {
+}
+
+void b(vector<float16_t, 3> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]));
+  uint2 ubo_load_9 = u[0].zw;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_10 = u[0].zw;
+  vector<float16_t, 2> ubo_load_10_xz = vector<float16_t, 2>(f16tof32(ubo_load_10 & 0xFFFF));
+  float16_t ubo_load_10_y = f16tof32(ubo_load_10[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_10_xz[0], ubo_load_10_y, ubo_load_10_xz[1]).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..3cff235
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,62 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(matrix<float16_t, 4, 3> m) {
+}
+
+void b(vector<float16_t, 3> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]));
+  uint2 ubo_load_9 = u[0].zw;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  b(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_10 = u[0].zw;
+  vector<float16_t, 2> ubo_load_10_xz = vector<float16_t, 2>(f16tof32(ubo_load_10 & 0xFFFF));
+  float16_t ubo_load_10_y = f16tof32(ubo_load_10[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_10_xz[0], ubo_load_10_y, ubo_load_10_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F16275ECF0(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F16275ECF0(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F16275ECF0(11,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..a7cd45d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,36 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+  f16vec3 inner_3;
+} u;
+
+void a(f16mat4x3 m) {
+}
+
+void b(f16vec3 v) {
+}
+
+void c(float16_t f_1) {
+}
+
+f16mat4x3 load_u_inner() {
+  return f16mat4x3(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  a(load_u_inner());
+  b(u.inner_1);
+  b(u.inner_1.zxy);
+  c(u.inner_1[0u]);
+  c(u.inner_1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..84e67e0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(half4x3 m) {
+}
+
+void b(half3 v) {
+}
+
+void c(half f_1) {
+}
+
+kernel void f(const constant half4x3* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(half3((*(tint_symbol))[1]).zxy);
+  c((*(tint_symbol))[1][0]);
+  c(half3((*(tint_symbol))[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..7ab21d9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,104 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 65
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat4v3half = OpTypeMatrix %v3half 4
+          %6 = OpTypeFunction %void %mat4v3half
+         %12 = OpTypeFunction %void %v3half
+         %16 = OpTypeFunction %void %half
+         %20 = OpTypeFunction %mat4v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+         %43 = OpTypeFunction %void
+         %56 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+          %a = OpFunction %void None %6
+          %m = OpFunctionParameter %mat4v3half
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v3half
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %half
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%load_u_inner = OpFunction %mat4v3half None %20
+         %22 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %29 = OpLoad %v3half %28
+         %32 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %33 = OpLoad %v3half %32
+         %36 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %37 = OpLoad %v3half %36
+         %40 = OpAccessChain %_ptr_Uniform_v3half %u %uint_3
+         %41 = OpLoad %v3half %40
+         %42 = OpCompositeConstruct %mat4v3half %29 %33 %37 %41
+               OpReturnValue %42
+               OpFunctionEnd
+          %f = OpFunction %void None %43
+         %45 = OpLabel
+         %47 = OpFunctionCall %mat4v3half %load_u_inner
+         %46 = OpFunctionCall %void %a %47
+         %49 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %50 = OpLoad %v3half %49
+         %48 = OpFunctionCall %void %b %50
+         %52 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %53 = OpLoad %v3half %52
+         %54 = OpVectorShuffle %v3half %53 %53 2 0 1
+         %51 = OpFunctionCall %void %b %54
+         %58 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %56
+         %59 = OpLoad %half %58
+         %55 = OpFunctionCall %void %c %59
+         %61 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %62 = OpLoad %v3half %61
+         %63 = OpVectorShuffle %v3half %62 %62 2 0 1
+         %64 = OpCompositeExtract %half %63 0
+         %60 = OpFunctionCall %void %c %64
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..5e553aa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x3<f16>;
+
+fn a(m : mat4x3<f16>) {
+}
+
+fn b(v : vec3<f16>) {
+}
+
+fn c(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].zxy);
+  c(u[1].x);
+  c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl
new file mode 100644
index 0000000..d971a47
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x3<f16>;
+var<private> p : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].zxy;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a448fe7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,43 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static matrix<float16_t, 4, 3> p = matrix<float16_t, 4, 3>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..d1183d2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,48 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static matrix<float16_t, 4, 3> p = matrix<float16_t, 4, 3>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  p[1] = vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001A61234DB60(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..b00a3ed
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,27 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+  f16vec3 inner_3;
+} u;
+
+f16mat4x3 p = f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf);
+f16mat4x3 load_u_inner() {
+  return f16mat4x3(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  p = load_u_inner();
+  p[1] = u.inner_0;
+  p[1] = u.inner_0.zxy;
+  p[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..5aa938f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half4x3* tint_symbol_1 [[buffer(0)]]) {
+  thread half4x3 tint_symbol = half4x3(0.0h);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = half3((*(tint_symbol_1))[0]).zxy;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..c733bfd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,87 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 55
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+%_ptr_Private_mat4v3half = OpTypePointer Private %mat4v3half
+          %9 = OpConstantNull %mat4v3half
+          %p = OpVariable %_ptr_Private_mat4v3half Private %9
+         %10 = OpTypeFunction %mat4v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v3half = OpTypePointer Private %v3half
+         %48 = OpConstantNull %int
+%_ptr_Private_half = OpTypePointer Private %half
+         %51 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat4v3half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %19 = OpLoad %v3half %18
+         %22 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %23 = OpLoad %v3half %22
+         %26 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %27 = OpLoad %v3half %26
+         %30 = OpAccessChain %_ptr_Uniform_v3half %u %uint_3
+         %31 = OpLoad %v3half %30
+         %32 = OpCompositeConstruct %mat4v3half %19 %23 %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %37 = OpFunctionCall %mat4v3half %load_u_inner
+               OpStore %p %37
+         %41 = OpAccessChain %_ptr_Private_v3half %p %int_1
+         %42 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %43 = OpLoad %v3half %42
+               OpStore %41 %43
+         %44 = OpAccessChain %_ptr_Private_v3half %p %int_1
+         %45 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %46 = OpLoad %v3half %45
+         %47 = OpVectorShuffle %v3half %46 %46 2 0 1
+               OpStore %44 %47
+         %50 = OpAccessChain %_ptr_Private_half %p %48 %int_1
+         %53 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %51
+         %54 = OpLoad %half %53
+               OpStore %50 %54
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..bf1dbf2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x3<f16>;
+
+var<private> p : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].zxy;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl
new file mode 100644
index 0000000..10ee5e7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x3<f16>;
+@group(0) @binding(1) var<storage, read_write> s : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].zxy;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7d09ac6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,50 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+matrix<float16_t, 4, 3> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]));
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..fb2dab0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,56 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+matrix<float16_t, 4, 3> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]));
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  s.Store<vector<float16_t, 3> >(8u, vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FD3C2FBCC0(6,66-74): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001FD3C2FBCC0(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..dd054eb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+  f16vec3 inner_3;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat4x3 inner;
+} s;
+
+f16mat4x3 load_u_inner() {
+  return f16mat4x3(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  s.inner = load_u_inner();
+  s.inner[1] = u.inner_0;
+  s.inner[1] = u.inner_0.zxy;
+  s.inner[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..cab8651
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device half4x3* tint_symbol [[buffer(1)]], const constant half4x3* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..d22bb08
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,97 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 57
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 8
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+    %u_block = OpTypeStruct %mat4v3half
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %10 = OpTypeFunction %mat4v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+%_ptr_StorageBuffer_mat4v3half = OpTypePointer StorageBuffer %mat4v3half
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+         %50 = OpConstantNull %int
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+         %53 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat4v3half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %19 = OpLoad %v3half %18
+         %22 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %23 = OpLoad %v3half %22
+         %26 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %27 = OpLoad %v3half %26
+         %30 = OpAccessChain %_ptr_Uniform_v3half %u %uint_3
+         %31 = OpLoad %v3half %30
+         %32 = OpCompositeConstruct %mat4v3half %19 %23 %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_mat4v3half %s %uint_0
+         %39 = OpFunctionCall %mat4v3half %load_u_inner
+               OpStore %38 %39
+         %43 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1
+         %44 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %45 = OpLoad %v3half %44
+               OpStore %43 %45
+         %46 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1
+         %47 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %48 = OpLoad %v3half %47
+         %49 = OpVectorShuffle %v3half %48 %48 2 0 1
+               OpStore %46 %49
+         %52 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %50 %int_1
+         %55 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %53
+         %56 = OpLoad %half %55
+               OpStore %52 %56
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..e556976
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x3<f16>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].zxy;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..258704a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x3<f16>;
+var<workgroup> w : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].zxy;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..d577f14
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,55 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared matrix<float16_t, 4, 3> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 3> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 4, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b5fa2ba
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,60 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared matrix<float16_t, 4, 3> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 3> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 4, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  w[1] = vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000019BA46EC370(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..4829a28
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,31 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+  f16vec3 inner_3;
+} u;
+
+shared f16mat4x3 w;
+f16mat4x3 load_u_inner() {
+  return f16mat4x3(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f(uint local_invocation_index) {
+  {
+    w = f16mat4x3(f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf));
+  }
+  barrier();
+  w = load_u_inner();
+  w[1] = u.inner_0;
+  w[1] = u.inner_0.zxy;
+  w[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..6df9ce1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  half4x3 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup half4x3* const tint_symbol, const constant half4x3* const tint_symbol_1) {
+  {
+    *(tint_symbol) = half4x3(half3(0.0h), half3(0.0h), half3(0.0h), half3(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant half4x3* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup half4x3* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..b29b7c7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,104 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 65
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+%_ptr_Workgroup_mat4v3half = OpTypePointer Workgroup %mat4v3half
+          %w = OpVariable %_ptr_Workgroup_mat4v3half Workgroup
+         %12 = OpTypeFunction %mat4v3half
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %34 = OpTypeFunction %void %uint
+         %39 = OpConstantNull %mat4v3half
+   %uint_264 = OpConstant %uint 264
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v3half = OpTypePointer Workgroup %v3half
+         %53 = OpConstantNull %int
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+         %56 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+         %60 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat4v3half None %12
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %20 = OpLoad %v3half %19
+         %23 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %24 = OpLoad %v3half %23
+         %27 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %28 = OpLoad %v3half %27
+         %31 = OpAccessChain %_ptr_Uniform_v3half %u %uint_3
+         %32 = OpLoad %v3half %31
+         %33 = OpCompositeConstruct %mat4v3half %20 %24 %28 %32
+               OpReturnValue %33
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %34
+%local_invocation_index = OpFunctionParameter %uint
+         %38 = OpLabel
+               OpStore %w %39
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %42 = OpFunctionCall %mat4v3half %load_u_inner
+               OpStore %w %42
+         %46 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1
+         %47 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %48 = OpLoad %v3half %47
+               OpStore %46 %48
+         %49 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1
+         %50 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %51 = OpLoad %v3half %50
+         %52 = OpVectorShuffle %v3half %51 %51 2 0 1
+               OpStore %49 %52
+         %55 = OpAccessChain %_ptr_Workgroup_half %w %53 %int_1
+         %58 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %56
+         %59 = OpLoad %half %58
+               OpStore %55 %59
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %60
+         %62 = OpLabel
+         %64 = OpLoad %uint %local_invocation_index_1
+         %63 = OpFunctionCall %void %f_inner %64
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..b6d3724
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x3<f16>;
+
+var<workgroup> w : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].zxy;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..c095f4c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat4x3<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat4x3<f32> = *p_m;
+  let l_m_i : vec3<f32>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..4816aa7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,26 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float4x3 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_4 = ((16u * uint(p_m_i_save))) / 4;
+  const float3 l_m_i = asfloat(m[scalar_offset_4 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..4816aa7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,26 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float4x3 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_4 = ((16u * uint(p_m_i_save))) / 4;
+  const float3 l_m_i = asfloat(m[scalar_offset_4 / 4].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..0c54273
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat4x3 inner;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_m_i_save = tint_symbol;
+  mat4x3 l_m = m.inner;
+  vec3 l_m_i = m.inner[p_m_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..cb87975
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant float4x3* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  float4x3 const l_m = *(tint_symbol_2);
+  float3 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..8ef501b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+    %m_block = OpTypeStruct %mat4v3float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %22 = OpFunctionCall %int %i
+         %26 = OpAccessChain %_ptr_Uniform_mat4v3float %m %uint_0
+         %27 = OpLoad %mat4v3float %26
+         %29 = OpAccessChain %_ptr_Uniform_v3float %m %uint_0 %22
+         %30 = OpLoad %v3float %29
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..7704f9d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat4x3<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat4x3<f32> = *(p_m);
+  let l_m_i : vec3<f32> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..6b1df2a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat4x3<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat4x3<f32> = *p_m;
+  let l_m_1 : vec3<f32>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..8492882
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 l_m = tint_symbol(m, 0u);
+  const float3 l_m_1 = asfloat(m[1].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8492882
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 l_m = tint_symbol(m, 0u);
+  const float3 l_m_1 = asfloat(m[1].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..6ceeadc
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,16 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat4x3 inner;
+} m;
+
+void f() {
+  mat4x3 l_m = m.inner;
+  vec3 l_m_1 = m.inner[1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..55af02e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant float4x3* tint_symbol_1 [[buffer(0)]]) {
+  float4x3 const l_m = *(tint_symbol_1);
+  float3 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..02ab5ed
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+    %m_block = OpTypeStruct %mat4v3float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %25 = OpAccessChain %_ptr_Uniform_mat4v3float %m %uint_0
+         %26 = OpLoad %mat4v3float %25
+         %28 = OpAccessChain %_ptr_Uniform_v3float %m %uint_0 %int_1
+         %29 = OpLoad %v3float %28
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..9e2d6bd
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat4x3<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat4x3<f32> = *(p_m);
+  let l_m_1 : vec3<f32> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl
new file mode 100644
index 0000000..1035979
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..9e00a15
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1].xyz));
+  const float a = abs(asfloat(u[0].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..9e00a15
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1].xyz));
+  const float a = abs(asfloat(u[0].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..ab382b6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner;
+} u;
+
+void f() {
+  mat3x4 t = transpose(u.inner);
+  float l = length(u.inner[1]);
+  float a = abs(u.inner[0].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..803b6d8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float4x3* tint_symbol [[buffer(0)]]) {
+  float3x4 const t = transpose(*(tint_symbol));
+  float const l = length((*(tint_symbol))[1]);
+  float const a = fabs(float3((*(tint_symbol))[0]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..d1dc513
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,53 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 32
+; Schema: 0
+               OpCapability Shader
+         %20 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+    %u_block = OpTypeStruct %mat4v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+         %27 = OpConstantNull %int
+          %f = OpFunction %void None %7
+         %10 = OpLabel
+         %17 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0
+         %18 = OpLoad %mat4v3float %17
+         %11 = OpTranspose %mat3v4float %18
+         %24 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %25 = OpLoad %v3float %24
+         %19 = OpExtInst %float %20 Length %25
+         %28 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %27
+         %29 = OpLoad %v3float %28
+         %30 = OpVectorShuffle %v3float %29 %29 2 0 1
+         %31 = OpCompositeExtract %float %30 0
+         %26 = OpExtInst %float %20 FAbs %31
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..3ff3ff4
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl
new file mode 100644
index 0000000..58fb44b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : mat4x3<f32>;
+
+fn a(m : mat4x3<f32>) {}
+fn b(v : vec3<f32>) {}
+fn c(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].zxy);
+    c(u[1].x);
+    c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..67c4557
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+void a(float4x3 m) {
+}
+
+void b(float3 v) {
+}
+
+void c(float f_1) {
+}
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1].xyz));
+  b(asfloat(u[1].xyz).zxy);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..67c4557
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+void a(float4x3 m) {
+}
+
+void b(float3 v) {
+}
+
+void c(float f_1) {
+}
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1].xyz));
+  b(asfloat(u[1].xyz).zxy);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1].xyz).zxy.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..ea4d739
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,28 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner;
+} u;
+
+void a(mat4x3 m) {
+}
+
+void b(vec3 v) {
+}
+
+void c(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  b(u.inner[1].zxy);
+  c(u.inner[1].x);
+  c(u.inner[1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..2fdb2d1
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(float4x3 m) {
+}
+
+void b(float3 v) {
+}
+
+void c(float f_1) {
+}
+
+kernel void f(const constant float4x3* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(float3((*(tint_symbol))[1]).zxy);
+  c((*(tint_symbol))[1][0]);
+  c(float3((*(tint_symbol))[1]).zxy[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..6ae3ab6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+    %u_block = OpTypeStruct %mat4v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void %mat4v3float
+         %12 = OpTypeFunction %void %v3float
+         %16 = OpTypeFunction %void %float
+         %20 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %a = OpFunction %void None %7
+          %m = OpFunctionParameter %mat4v3float
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v3float
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %float
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %20
+         %22 = OpLabel
+         %27 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0
+         %28 = OpLoad %mat4v3float %27
+         %23 = OpFunctionCall %void %a %28
+         %33 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %34 = OpLoad %v3float %33
+         %29 = OpFunctionCall %void %b %34
+         %36 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %37 = OpLoad %v3float %36
+         %38 = OpVectorShuffle %v3float %37 %37 2 0 1
+         %35 = OpFunctionCall %void %b %38
+         %41 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %uint_0
+         %42 = OpLoad %float %41
+         %39 = OpFunctionCall %void %c %42
+         %44 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1
+         %45 = OpLoad %v3float %44
+         %46 = OpVectorShuffle %v3float %45 %45 2 0 1
+         %47 = OpCompositeExtract %float %46 0
+         %43 = OpFunctionCall %void %c %47
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..04612d2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,19 @@
+@group(0) @binding(0) var<uniform> u : mat4x3<f32>;
+
+fn a(m : mat4x3<f32>) {
+}
+
+fn b(v : vec3<f32>) {
+}
+
+fn c(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].zxy);
+  c(u[1].x);
+  c(u[1].zxy.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl
new file mode 100644
index 0000000..891c736
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat4x3<f32>;
+var<private> p : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].zxy;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..e0bec97
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+static float4x3 p = float4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0].xyz);
+  p[1] = asfloat(u[0].xyz).zxy;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..e0bec97
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+static float4x3 p = float4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0].xyz);
+  p[1] = asfloat(u[0].xyz).zxy;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..2a37a32
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner;
+} u;
+
+mat4x3 p = mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+void f() {
+  p = u.inner;
+  p[1] = u.inner[0];
+  p[1] = u.inner[0].zxy;
+  p[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..904c327
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float4x3* tint_symbol_1 [[buffer(0)]]) {
+  thread float4x3 tint_symbol = float4x3(0.0f);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = float3((*(tint_symbol_1))[0]).zxy;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..e7b0c0b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+    %u_block = OpTypeStruct %mat4v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private_mat4v3float = OpTypePointer Private %mat4v3float
+          %9 = OpConstantNull %mat4v3float
+          %p = OpVariable %_ptr_Private_mat4v3float Private %9
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v3float = OpTypePointer Private %v3float
+         %23 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %17 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0
+         %18 = OpLoad %mat4v3float %17
+               OpStore %p %18
+         %22 = OpAccessChain %_ptr_Private_v3float %p %int_1
+         %25 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %23
+         %26 = OpLoad %v3float %25
+               OpStore %22 %26
+         %27 = OpAccessChain %_ptr_Private_v3float %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %23
+         %29 = OpLoad %v3float %28
+         %30 = OpVectorShuffle %v3float %29 %29 2 0 1
+               OpStore %27 %30
+         %32 = OpAccessChain %_ptr_Private_float %p %23 %int_1
+         %34 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %23
+         %35 = OpLoad %float %34
+               OpStore %32 %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..7eebf15
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat4x3<f32>;
+
+var<private> p : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].zxy;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl
new file mode 100644
index 0000000..64ba3f2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat4x3<f32>;
+@group(0) @binding(1) var<storage, read_write> s : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].zxy;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..372d7c0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+float4x3 tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store3(16u, asuint(asfloat(u[0].xyz)));
+  s.Store3(16u, asuint(asfloat(u[0].xyz).zxy));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..372d7c0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+float4x3 tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store3(16u, asuint(asfloat(u[0].xyz)));
+  s.Store3(16u, asuint(asfloat(u[0].xyz).zxy));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..8a91538
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat4x3 inner;
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[0];
+  s.inner[1] = u.inner[0].zxy;
+  s.inner[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..b3dcd87
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device float4x3* tint_symbol [[buffer(1)]], const constant float4x3* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..c6b01ce
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+    %u_block = OpTypeStruct %mat4v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+          %9 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+         %24 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %9
+         %12 = OpLabel
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %s %uint_0
+         %18 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0
+         %19 = OpLoad %mat4v3float %18
+               OpStore %16 %19
+         %23 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1
+         %26 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %24
+         %27 = OpLoad %v3float %26
+               OpStore %23 %27
+         %28 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %24
+         %30 = OpLoad %v3float %29
+         %31 = OpVectorShuffle %v3float %30 %30 2 0 1
+               OpStore %28 %31
+         %33 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %24 %int_1
+         %35 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %24
+         %36 = OpLoad %float %35
+               OpStore %33 %36
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..73926e8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat4x3<f32>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].zxy;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..b5e5b2f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat4x3<f32>;
+var<workgroup> w : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].zxy;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..fab699a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,33 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+groupshared float4x3 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x3 tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0].xyz);
+  w[1] = asfloat(u[0].xyz).zxy;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..fab699a
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,33 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+groupshared float4x3 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x3 tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0].xyz);
+  w[1] = asfloat(u[0].xyz).zxy;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..d75121b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner;
+} u;
+
+shared mat4x3 w;
+void f(uint local_invocation_index) {
+  {
+    w = mat4x3(vec3(0.0f), vec3(0.0f), vec3(0.0f), vec3(0.0f));
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[0];
+  w[1] = u.inner[0].zxy;
+  w[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..ffc9775
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  float4x3 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup float4x3* const tint_symbol, const constant float4x3* const tint_symbol_1) {
+  {
+    *(tint_symbol) = float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float3((*(tint_symbol_1))[0]).zxy;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant float4x3* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup float4x3* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..1f5e90b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+    %u_block = OpTypeStruct %mat4v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup_mat4v3float = OpTypePointer Workgroup %mat4v3float
+          %w = OpVariable %_ptr_Workgroup_mat4v3float Workgroup
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %uint
+         %17 = OpConstantNull %mat4v3float
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
+         %29 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %42 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %12
+%local_invocation_index = OpFunctionParameter %uint
+         %16 = OpLabel
+               OpStore %w %17
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %23 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0
+         %24 = OpLoad %mat4v3float %23
+               OpStore %w %24
+         %28 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1
+         %31 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %29
+         %32 = OpLoad %v3float %31
+               OpStore %28 %32
+         %33 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1
+         %34 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %29
+         %35 = OpLoad %v3float %34
+         %36 = OpVectorShuffle %v3float %35 %35 2 0 1
+               OpStore %33 %36
+         %38 = OpAccessChain %_ptr_Workgroup_float %w %29 %int_1
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %29
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %42
+         %44 = OpLabel
+         %46 = OpLoad %uint %local_invocation_index_1
+         %45 = OpFunctionCall %void %f_inner %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..7099022
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat4x3<f32>;
+
+var<workgroup> w : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].zxy;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..21e943d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x4<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat4x4<f16> = *p_m;
+  let l_m_i : vec4<f16>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..16e4c4f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,46 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 4, 4> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_4 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_9 = m[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_m_i = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a85dc45
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,51 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const matrix<float16_t, 4, 4> l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_4 = ((8u * uint(p_m_i_save))) / 4;
+  uint4 ubo_load_9 = m[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_m_i = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021737B8AE70(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..fd478c5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,58 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+  f16vec4 inner_3;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat4 load_m_inner() {
+  return f16mat4(m.inner_0, m.inner_1, m.inner_2, m.inner_3);
+}
+
+f16vec4 load_m_inner_p0(uint p0) {
+  switch(p0) {
+    case 0u: {
+      return m.inner_0;
+      break;
+    }
+    case 1u: {
+      return m.inner_1;
+      break;
+    }
+    case 2u: {
+      return m.inner_2;
+      break;
+    }
+    case 3u: {
+      return m.inner_3;
+      break;
+    }
+    default: {
+      return f16vec4(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat4 p_m = load_m_inner();
+  int tint_symbol = i();
+  f16vec4 p_m_i = load_m_inner_p0(uint(tint_symbol));
+  f16mat4 l_m = load_m_inner();
+  f16vec4 l_m_i = load_m_inner_p0(uint(tint_symbol));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..f8948a7
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant half4x4* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  half4x4 const l_m = *(tint_symbol_2);
+  half4 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..6e3ae0b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,111 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 68
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpMemberName %m_block_std140 3 "inner_3"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %load_m_inner_p0 "load_m_inner_p0"
+               OpName %p0 "p0"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpMemberDecorate %m_block_std140 2 Offset 16
+               OpMemberDecorate %m_block_std140 3 Offset 24
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%m_block_std140 = OpTypeStruct %v4half %v4half %v4half %v4half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v4half = OpTypeMatrix %v4half 4
+         %17 = OpTypeFunction %mat4v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+         %41 = OpTypeFunction %v4half %uint
+         %59 = OpConstantNull %v4half
+       %void = OpTypeVoid
+         %60 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat4v4half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v4half %m %uint_0
+         %27 = OpLoad %v4half %26
+         %30 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %31 = OpLoad %v4half %30
+         %34 = OpAccessChain %_ptr_Uniform_v4half %m %uint_2
+         %35 = OpLoad %v4half %34
+         %38 = OpAccessChain %_ptr_Uniform_v4half %m %uint_3
+         %39 = OpLoad %v4half %38
+         %40 = OpCompositeConstruct %mat4v4half %27 %31 %35 %39
+               OpReturnValue %40
+               OpFunctionEnd
+%load_m_inner_p0 = OpFunction %v4half None %41
+         %p0 = OpFunctionParameter %uint
+         %44 = OpLabel
+               OpSelectionMerge %45 None
+               OpSwitch %p0 %46 0 %47 1 %48 2 %49 3 %50
+         %47 = OpLabel
+         %51 = OpAccessChain %_ptr_Uniform_v4half %m %uint_0
+         %52 = OpLoad %v4half %51
+               OpReturnValue %52
+         %48 = OpLabel
+         %53 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %54 = OpLoad %v4half %53
+               OpReturnValue %54
+         %49 = OpLabel
+         %55 = OpAccessChain %_ptr_Uniform_v4half %m %uint_2
+         %56 = OpLoad %v4half %55
+               OpReturnValue %56
+         %50 = OpLabel
+         %57 = OpAccessChain %_ptr_Uniform_v4half %m %uint_3
+         %58 = OpLoad %v4half %57
+               OpReturnValue %58
+         %46 = OpLabel
+               OpReturnValue %59
+         %45 = OpLabel
+               OpReturnValue %59
+               OpFunctionEnd
+          %f = OpFunction %void None %60
+         %63 = OpLabel
+         %64 = OpFunctionCall %int %i
+         %65 = OpFunctionCall %mat4v4half %load_m_inner
+         %67 = OpBitcast %uint %64
+         %66 = OpFunctionCall %v4half %load_m_inner_p0 %67
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..0362728
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x4<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat4x4<f16> = *(p_m);
+  let l_m_i : vec4<f16> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..a78172e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl

@@ -0,0 +1,15 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x4<f16>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat4x4<f16> = *p_m;
+  let l_m_1 : vec4<f16>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a841311
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,43 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 4> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_8 = m[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_m_1 = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..6d03956
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,48 @@
+SKIP: FAILED
+
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[2];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 4> l_m = tint_symbol(m, 0u);
+  uint2 ubo_load_8 = m[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const vector<float16_t, 4> l_m_1 = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002055528A940(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..4175a0d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,26 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform m_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+  f16vec4 inner_3;
+} m;
+
+f16mat4 load_m_inner() {
+  return f16mat4(m.inner_0, m.inner_1, m.inner_2, m.inner_3);
+}
+
+void f() {
+  f16mat4 p_m = load_m_inner();
+  f16vec4 p_m_1 = m.inner_1;
+  f16mat4 l_m = load_m_inner();
+  f16vec4 l_m_1 = m.inner_1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..6e62a9c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant half4x4* tint_symbol_1 [[buffer(0)]]) {
+  half4x4 const l_m = *(tint_symbol_1);
+  half4 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..1c9316c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block_std140 "m_block_std140"
+               OpMemberName %m_block_std140 0 "inner_0"
+               OpMemberName %m_block_std140 1 "inner_1"
+               OpMemberName %m_block_std140 2 "inner_2"
+               OpMemberName %m_block_std140 3 "inner_3"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %load_m_inner "load_m_inner"
+               OpName %f "f"
+               OpDecorate %m_block_std140 Block
+               OpMemberDecorate %m_block_std140 0 Offset 0
+               OpMemberDecorate %m_block_std140 1 Offset 8
+               OpMemberDecorate %m_block_std140 2 Offset 16
+               OpMemberDecorate %m_block_std140 3 Offset 24
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%m_block_std140 = OpTypeStruct %v4half %v4half %v4half %v4half
+%_ptr_Uniform_m_block_std140 = OpTypePointer Uniform %m_block_std140
+          %m = OpVariable %_ptr_Uniform_m_block_std140 Uniform
+        %int = OpTypeInt 32 1
+          %7 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %7
+         %10 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v4half = OpTypeMatrix %v4half 4
+         %17 = OpTypeFunction %mat4v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %41 = OpTypeFunction %void
+          %i = OpFunction %int None %10
+         %12 = OpLabel
+         %13 = OpLoad %int %counter
+         %15 = OpIAdd %int %13 %int_1
+               OpStore %counter %15
+         %16 = OpLoad %int %counter
+               OpReturnValue %16
+               OpFunctionEnd
+%load_m_inner = OpFunction %mat4v4half None %17
+         %20 = OpLabel
+         %26 = OpAccessChain %_ptr_Uniform_v4half %m %uint_0
+         %27 = OpLoad %v4half %26
+         %30 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %31 = OpLoad %v4half %30
+         %34 = OpAccessChain %_ptr_Uniform_v4half %m %uint_2
+         %35 = OpLoad %v4half %34
+         %38 = OpAccessChain %_ptr_Uniform_v4half %m %uint_3
+         %39 = OpLoad %v4half %38
+         %40 = OpCompositeConstruct %mat4v4half %27 %31 %35 %39
+               OpReturnValue %40
+               OpFunctionEnd
+          %f = OpFunction %void None %41
+         %44 = OpLabel
+         %45 = OpFunctionCall %mat4v4half %load_m_inner
+         %46 = OpAccessChain %_ptr_Uniform_v4half %m %uint_1
+         %47 = OpLoad %v4half %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..5ee7934
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat4x4<f16>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat4x4<f16> = *(p_m);
+  let l_m_1 : vec4<f16> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl
new file mode 100644
index 0000000..2cd0d33
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..cd3f135
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 4> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]));
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..cb0cc66
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,46 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 4> t = transpose(tint_symbol(u, 0u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  const float16_t l = length(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]));
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  const float16_t a = abs(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000023EC1BAE110(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..bdf7c59
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,25 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+  f16vec4 inner_3;
+} u;
+
+f16mat4 load_u_inner() {
+  return f16mat4(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  f16mat4 t = transpose(load_u_inner());
+  float16_t l = length(u.inner_1);
+  float16_t a = abs(u.inner_0.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..f05f8a0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half4x4* tint_symbol [[buffer(0)]]) {
+  half4x4 const t = transpose(*(tint_symbol));
+  half const l = length((*(tint_symbol))[1]);
+  half const a = fabs(half4((*(tint_symbol))[0]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..8175c9c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,73 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %37 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+          %6 = OpTypeFunction %mat4v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %30 = OpTypeFunction %void
+         %44 = OpConstantNull %uint
+%load_u_inner = OpFunction %mat4v4half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %16 = OpLoad %v4half %15
+         %19 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %20 = OpLoad %v4half %19
+         %23 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %24 = OpLoad %v4half %23
+         %27 = OpAccessChain %_ptr_Uniform_v4half %u %uint_3
+         %28 = OpLoad %v4half %27
+         %29 = OpCompositeConstruct %mat4v4half %16 %20 %24 %28
+               OpReturnValue %29
+               OpFunctionEnd
+          %f = OpFunction %void None %30
+         %33 = OpLabel
+         %35 = OpFunctionCall %mat4v4half %load_u_inner
+         %34 = OpTranspose %mat4v4half %35
+         %38 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %39 = OpLoad %v4half %38
+         %36 = OpExtInst %half %37 Length %39
+         %41 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %42 = OpLoad %v4half %41
+         %43 = OpVectorShuffle %v4half %42 %42 1 3 0 2
+         %45 = OpCompositeExtract %half %43 0
+         %40 = OpExtInst %half %37 FAbs %45
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..aafbef5
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl
new file mode 100644
index 0000000..47a7a69
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl

@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x4<f16>;
+
+fn a(m : mat4x4<f16>) {}
+fn b(v : vec4<f16>) {}
+fn c(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].ywxz);
+    c(u[1].x);
+    c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..01d30aef
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,55 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(matrix<float16_t, 4, 4> m) {
+}
+
+void b(vector<float16_t, 4> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  b(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]));
+  uint2 ubo_load_9 = u[0].zw;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  b(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_10 = u[0].zw;
+  vector<float16_t, 2> ubo_load_10_xz = vector<float16_t, 2>(f16tof32(ubo_load_10 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_10_yw = vector<float16_t, 2>(f16tof32(ubo_load_10 >> 16));
+  c(vector<float16_t, 4>(ubo_load_10_xz[0], ubo_load_10_yw[0], ubo_load_10_xz[1], ubo_load_10_yw[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..212ddb2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,62 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+void a(matrix<float16_t, 4, 4> m) {
+}
+
+void b(vector<float16_t, 4> v) {
+}
+
+void c(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  b(vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]));
+  uint2 ubo_load_9 = u[0].zw;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  b(vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz);
+  c(float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  uint2 ubo_load_10 = u[0].zw;
+  vector<float16_t, 2> ubo_load_10_xz = vector<float16_t, 2>(f16tof32(ubo_load_10 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_10_yw = vector<float16_t, 2>(f16tof32(ubo_load_10 >> 16));
+  c(vector<float16_t, 4>(ubo_load_10_xz[0], ubo_load_10_yw[0], ubo_load_10_xz[1], ubo_load_10_yw[1]).ywxz.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001E1E337D1E0(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001E1E337D1E0(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001E1E337D1E0(11,8-16): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..113ee01
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.glsl

@@ -0,0 +1,36 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+  f16vec4 inner_3;
+} u;
+
+void a(f16mat4 m) {
+}
+
+void b(f16vec4 v) {
+}
+
+void c(float16_t f_1) {
+}
+
+f16mat4 load_u_inner() {
+  return f16mat4(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  a(load_u_inner());
+  b(u.inner_1);
+  b(u.inner_1.ywxz);
+  c(u.inner_1[0u]);
+  c(u.inner_1.ywxz[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..d2d7153
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(half4x4 m) {
+}
+
+void b(half4 v) {
+}
+
+void c(half f_1) {
+}
+
+kernel void f(const constant half4x4* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(half4((*(tint_symbol))[1]).ywxz);
+  c((*(tint_symbol))[1][0]);
+  c(half4((*(tint_symbol))[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..34680c6
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,104 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 65
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat4v4half = OpTypeMatrix %v4half 4
+          %6 = OpTypeFunction %void %mat4v4half
+         %12 = OpTypeFunction %void %v4half
+         %16 = OpTypeFunction %void %half
+         %20 = OpTypeFunction %mat4v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+         %43 = OpTypeFunction %void
+         %56 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+          %a = OpFunction %void None %6
+          %m = OpFunctionParameter %mat4v4half
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v4half
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %half
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%load_u_inner = OpFunction %mat4v4half None %20
+         %22 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %29 = OpLoad %v4half %28
+         %32 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %33 = OpLoad %v4half %32
+         %36 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %37 = OpLoad %v4half %36
+         %40 = OpAccessChain %_ptr_Uniform_v4half %u %uint_3
+         %41 = OpLoad %v4half %40
+         %42 = OpCompositeConstruct %mat4v4half %29 %33 %37 %41
+               OpReturnValue %42
+               OpFunctionEnd
+          %f = OpFunction %void None %43
+         %45 = OpLabel
+         %47 = OpFunctionCall %mat4v4half %load_u_inner
+         %46 = OpFunctionCall %void %a %47
+         %49 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %50 = OpLoad %v4half %49
+         %48 = OpFunctionCall %void %b %50
+         %52 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %53 = OpLoad %v4half %52
+         %54 = OpVectorShuffle %v4half %53 %53 1 3 0 2
+         %51 = OpFunctionCall %void %b %54
+         %58 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %56
+         %59 = OpLoad %half %58
+         %55 = OpFunctionCall %void %c %59
+         %61 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %62 = OpLoad %v4half %61
+         %63 = OpVectorShuffle %v4half %62 %62 1 3 0 2
+         %64 = OpCompositeExtract %half %63 0
+         %60 = OpFunctionCall %void %c %64
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..c99345b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,21 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x4<f16>;
+
+fn a(m : mat4x4<f16>) {
+}
+
+fn b(v : vec4<f16>) {
+}
+
+fn c(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].ywxz);
+  c(u[1].x);
+  c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl
new file mode 100644
index 0000000..964be33
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x4<f16>;
+var<private> p : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].ywxz;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..686f794
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,43 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static matrix<float16_t, 4, 4> p = matrix<float16_t, 4, 4>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7bb4eda
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,48 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+static matrix<float16_t, 4, 4> p = matrix<float16_t, 4, 4>(float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h), float16_t(0.0h));
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  p[1] = vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz;
+  p[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001F1E6D79B60(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..5d0c395
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.glsl

@@ -0,0 +1,27 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+  f16vec4 inner_3;
+} u;
+
+f16mat4 p = f16mat4(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf);
+f16mat4 load_u_inner() {
+  return f16mat4(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  p = load_u_inner();
+  p[1] = u.inner_0;
+  p[1] = u.inner_0.ywxz;
+  p[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..7d4af8b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant half4x4* tint_symbol_1 [[buffer(0)]]) {
+  thread half4x4 tint_symbol = half4x4(0.0h);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = half4((*(tint_symbol_1))[0]).ywxz;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..afb9628
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.spvasm

@@ -0,0 +1,87 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 55
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+%_ptr_Private_mat4v4half = OpTypePointer Private %mat4v4half
+          %9 = OpConstantNull %mat4v4half
+          %p = OpVariable %_ptr_Private_mat4v4half Private %9
+         %10 = OpTypeFunction %mat4v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v4half = OpTypePointer Private %v4half
+         %48 = OpConstantNull %int
+%_ptr_Private_half = OpTypePointer Private %half
+         %51 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat4v4half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %19 = OpLoad %v4half %18
+         %22 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %23 = OpLoad %v4half %22
+         %26 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %27 = OpLoad %v4half %26
+         %30 = OpAccessChain %_ptr_Uniform_v4half %u %uint_3
+         %31 = OpLoad %v4half %30
+         %32 = OpCompositeConstruct %mat4v4half %19 %23 %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %37 = OpFunctionCall %mat4v4half %load_u_inner
+               OpStore %p %37
+         %41 = OpAccessChain %_ptr_Private_v4half %p %int_1
+         %42 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %43 = OpLoad %v4half %42
+               OpStore %41 %43
+         %44 = OpAccessChain %_ptr_Private_v4half %p %int_1
+         %45 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %46 = OpLoad %v4half %45
+         %47 = OpVectorShuffle %v4half %46 %46 1 3 0 2
+               OpStore %44 %47
+         %50 = OpAccessChain %_ptr_Private_half %p %48 %int_1
+         %53 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %51
+         %54 = OpLoad %half %53
+               OpStore %50 %54
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..d5db396
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_private.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x4<f16>;
+
+var<private> p : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].ywxz;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl
new file mode 100644
index 0000000..4eeb003
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x4<f16>;
+@group(0) @binding(1) var<storage, read_write> s : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].ywxz;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ab84a62
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,50 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+matrix<float16_t, 4, 4> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]));
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f709acf
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,56 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 4> value) {
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
+}
+
+matrix<float16_t, 4, 4> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]));
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  s.Store<vector<float16_t, 4> >(8u, vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz);
+  s.Store<float16_t>(2u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000017A08509500(6,66-74): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000017A08509500(7,3-14): error X3018: invalid subscript 'Store'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..0d01706
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.glsl

@@ -0,0 +1,30 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+  f16vec4 inner_3;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat4 inner;
+} s;
+
+f16mat4 load_u_inner() {
+  return f16mat4(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f() {
+  s.inner = load_u_inner();
+  s.inner[1] = u.inner_0;
+  s.inner[1] = u.inner_0.ywxz;
+  s.inner[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..4530643
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device half4x4* tint_symbol [[buffer(1)]], const constant half4x4* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..95d5fea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,97 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 57
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 8
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+    %u_block = OpTypeStruct %mat4v4half
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %10 = OpTypeFunction %mat4v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+%_ptr_StorageBuffer_mat4v4half = OpTypePointer StorageBuffer %mat4v4half
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v4half = OpTypePointer StorageBuffer %v4half
+         %50 = OpConstantNull %int
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+         %53 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%load_u_inner = OpFunction %mat4v4half None %10
+         %12 = OpLabel
+         %18 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %19 = OpLoad %v4half %18
+         %22 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %23 = OpLoad %v4half %22
+         %26 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %27 = OpLoad %v4half %26
+         %30 = OpAccessChain %_ptr_Uniform_v4half %u %uint_3
+         %31 = OpLoad %v4half %30
+         %32 = OpCompositeConstruct %mat4v4half %19 %23 %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+          %f = OpFunction %void None %33
+         %36 = OpLabel
+         %38 = OpAccessChain %_ptr_StorageBuffer_mat4v4half %s %uint_0
+         %39 = OpFunctionCall %mat4v4half %load_u_inner
+               OpStore %38 %39
+         %43 = OpAccessChain %_ptr_StorageBuffer_v4half %s %uint_0 %int_1
+         %44 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %45 = OpLoad %v4half %44
+               OpStore %43 %45
+         %46 = OpAccessChain %_ptr_StorageBuffer_v4half %s %uint_0 %int_1
+         %47 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %48 = OpLoad %v4half %47
+         %49 = OpVectorShuffle %v4half %48 %48 1 3 0 2
+               OpStore %46 %49
+         %52 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %50 %int_1
+         %55 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %53
+         %56 = OpLoad %half %55
+               OpStore %52 %56
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..e7a3faa
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x4<f16>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].ywxz;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl
new file mode 100644
index 0000000..3018a0f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl

@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x4<f16>;
+var<workgroup> w : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].ywxz;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..3cf441e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,55 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared matrix<float16_t, 4, 4> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 4> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 4, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ca75021
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,60 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+groupshared matrix<float16_t, 4, 4> w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 4> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = matrix<float16_t, 4, 4>((float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx, (float16_t(0.0h)).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  uint2 ubo_load_8 = u[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_8_yw = vector<float16_t, 2>(f16tof32(ubo_load_8 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_8_xz[0], ubo_load_8_yw[0], ubo_load_8_xz[1], ubo_load_8_yw[1]);
+  uint2 ubo_load_9 = u[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_9_yw = vector<float16_t, 2>(f16tof32(ubo_load_9 >> 16));
+  w[1] = vector<float16_t, 4>(ubo_load_9_xz[0], ubo_load_9_yw[0], ubo_load_9_xz[1], ubo_load_9_yw[1]).ywxz;
+  w[0][1] = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000234396001A0(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..18b9337
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,31 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+  f16vec4 inner_3;
+} u;
+
+shared f16mat4 w;
+f16mat4 load_u_inner() {
+  return f16mat4(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void f(uint local_invocation_index) {
+  {
+    w = f16mat4(f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf), f16vec4(0.0hf));
+  }
+  barrier();
+  w = load_u_inner();
+  w[1] = u.inner_0;
+  w[1] = u.inner_0.ywxz;
+  w[0][1] = u.inner_1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..57392de
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  half4x4 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup half4x4* const tint_symbol, const constant half4x4* const tint_symbol_1) {
+  {
+    *(tint_symbol) = half4x4(half4(0.0h), half4(0.0h), half4(0.0h), half4(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = half4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant half4x4* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup half4x4* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..c26e2ab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,104 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 65
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %load_u_inner "load_u_inner"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+%_ptr_Workgroup_mat4v4half = OpTypePointer Workgroup %mat4v4half
+          %w = OpVariable %_ptr_Workgroup_mat4v4half Workgroup
+         %12 = OpTypeFunction %mat4v4half
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %34 = OpTypeFunction %void %uint
+         %39 = OpConstantNull %mat4v4half
+   %uint_264 = OpConstant %uint 264
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v4half = OpTypePointer Workgroup %v4half
+         %53 = OpConstantNull %int
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+         %56 = OpConstantNull %uint
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+         %60 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat4v4half None %12
+         %14 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %20 = OpLoad %v4half %19
+         %23 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %24 = OpLoad %v4half %23
+         %27 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %28 = OpLoad %v4half %27
+         %31 = OpAccessChain %_ptr_Uniform_v4half %u %uint_3
+         %32 = OpLoad %v4half %31
+         %33 = OpCompositeConstruct %mat4v4half %20 %24 %28 %32
+               OpReturnValue %33
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %34
+%local_invocation_index = OpFunctionParameter %uint
+         %38 = OpLabel
+               OpStore %w %39
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %42 = OpFunctionCall %mat4v4half %load_u_inner
+               OpStore %w %42
+         %46 = OpAccessChain %_ptr_Workgroup_v4half %w %int_1
+         %47 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %48 = OpLoad %v4half %47
+               OpStore %46 %48
+         %49 = OpAccessChain %_ptr_Workgroup_v4half %w %int_1
+         %50 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %51 = OpLoad %v4half %50
+         %52 = OpVectorShuffle %v4half %51 %51 1 3 0 2
+               OpStore %49 %52
+         %55 = OpAccessChain %_ptr_Workgroup_half %w %53 %int_1
+         %58 = OpAccessChain %_ptr_Uniform_half %u %uint_1 %56
+         %59 = OpLoad %half %58
+               OpStore %55 %59
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %60
+         %62 = OpLabel
+         %64 = OpLoad %uint %local_invocation_index_1
+         %63 = OpFunctionCall %void %f_inner %64
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..75f45fe
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f16/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x4<f16>;
+
+var<workgroup> w : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].ywxz;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl
new file mode 100644
index 0000000..09d34f3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat4x4<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_i = &((*p_m)[i()]);
+
+  let l_m   : mat4x4<f32> = *p_m;
+  let l_m_i : vec4<f32>   = *p_m_i;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ea3e8b8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,26 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float4x4 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_4 = ((16u * uint(p_m_i_save))) / 4;
+  const float4 l_m_i = asfloat(m[scalar_offset_4 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..ea3e8b8
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,26 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_m_i_save = i();
+  const float4x4 l_m = tint_symbol(m, 0u);
+  const uint scalar_offset_4 = ((16u * uint(p_m_i_save))) / 4;
+  const float4 l_m_i = asfloat(m[scalar_offset_4 / 4]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..37b6cda
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,24 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat4 inner;
+} m;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_m_i_save = tint_symbol;
+  mat4 l_m = m.inner;
+  vec4 l_m_i = m.inner[p_m_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..084622b
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol_1 = 0;
+  tint_symbol_1 = as_type<int>((as_type<uint>(tint_symbol_1) + as_type<uint>(1)));
+  return tint_symbol_1;
+}
+
+kernel void f(const constant float4x4* tint_symbol_2 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_m_i_save = tint_symbol;
+  float4x4 const l_m = *(tint_symbol_2);
+  float4 const l_m_i = (*(tint_symbol_2))[p_m_i_save];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..c010265
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %m_block = OpTypeStruct %mat4v4float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %22 = OpFunctionCall %int %i
+         %26 = OpAccessChain %_ptr_Uniform_mat4v4float %m %uint_0
+         %27 = OpLoad %mat4v4float %26
+         %29 = OpAccessChain %_ptr_Uniform_v4float %m %uint_0 %22
+         %30 = OpLoad %v4float %29
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..6654dab
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/dynamic_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat4x4<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_i = &((*(p_m))[i()]);
+  let l_m : mat4x4<f32> = *(p_m);
+  let l_m_i : vec4<f32> = *(p_m_i);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl
new file mode 100644
index 0000000..2c0c99d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl

@@ -0,0 +1,13 @@
+@group(0) @binding(0) var<uniform> m : mat4x4<f32>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m   = &m;
+  let p_m_1 = &((*p_m)[1]);
+
+  let l_m   : mat4x4<f32> = *p_m;
+  let l_m_1 : vec4<f32>   = *p_m_1;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..8ae2c10
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x4 l_m = tint_symbol(m, 0u);
+  const float4 l_m_1 = asfloat(m[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8ae2c10
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl

@@ -0,0 +1,24 @@
+cbuffer cbuffer_m : register(b0, space0) {
+  uint4 m[4];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x4 l_m = tint_symbol(m, 0u);
+  const float4 l_m_1 = asfloat(m[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.glsl
new file mode 100644
index 0000000..e9fc87c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.glsl

@@ -0,0 +1,16 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform m_block_ubo {
+  mat4 inner;
+} m;
+
+void f() {
+  mat4 l_m = m.inner;
+  vec4 l_m_1 = m.inner[1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.msl
new file mode 100644
index 0000000..8118193
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.msl

@@ -0,0 +1,15 @@
+#include <metal_stdlib>
+
+using namespace metal;
+int i() {
+  thread int tint_symbol = 0;
+  tint_symbol = as_type<int>((as_type<uint>(tint_symbol) + as_type<uint>(1)));
+  return tint_symbol;
+}
+
+kernel void f(const constant float4x4* tint_symbol_1 [[buffer(0)]]) {
+  float4x4 const l_m = *(tint_symbol_1);
+  float4 const l_m_1 = (*(tint_symbol_1))[1];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.spvasm
new file mode 100644
index 0000000..e4d3f30
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.spvasm

@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %m_block "m_block"
+               OpMemberName %m_block 0 "inner"
+               OpName %m "m"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %m_block Block
+               OpMemberDecorate %m_block 0 Offset 0
+               OpMemberDecorate %m_block 0 ColMajor
+               OpMemberDecorate %m_block 0 MatrixStride 16
+               OpDecorate %m NonWritable
+               OpDecorate %m DescriptorSet 0
+               OpDecorate %m Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %m_block = OpTypeStruct %mat4v4float
+%_ptr_Uniform_m_block = OpTypePointer Uniform %m_block
+          %m = OpVariable %_ptr_Uniform_m_block Uniform
+        %int = OpTypeInt 32 1
+          %8 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %8
+         %11 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+          %i = OpFunction %int None %11
+         %13 = OpLabel
+         %14 = OpLoad %int %counter
+         %16 = OpIAdd %int %14 %int_1
+               OpStore %counter %16
+         %17 = OpLoad %int %counter
+               OpReturnValue %17
+               OpFunctionEnd
+          %f = OpFunction %void None %18
+         %21 = OpLabel
+         %25 = OpAccessChain %_ptr_Uniform_mat4v4float %m %uint_0
+         %26 = OpLoad %mat4v4float %25
+         %28 = OpAccessChain %_ptr_Uniform_v4float %m %uint_0 %int_1
+         %29 = OpLoad %v4float %28
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.wgsl
new file mode 100644
index 0000000..7729038
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/static_index_via_ptr.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+@group(0) @binding(0) var<uniform> m : mat4x4<f32>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_m = &(m);
+  let p_m_1 = &((*(p_m))[1]);
+  let l_m : mat4x4<f32> = *(p_m);
+  let l_m_1 : vec4<f32> = *(p_m_1);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl
new file mode 100644
index 0000000..396c963
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat4x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u);
+    let l = length(u[1]);
+    let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7642211
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+float4x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x4 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1]));
+  const float a = abs(asfloat(u[0]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7642211
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+float4x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x4 t = transpose(tint_symbol(u, 0u));
+  const float l = length(asfloat(u[1]));
+  const float a = abs(asfloat(u[0]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.glsl
new file mode 100644
index 0000000..98ccccb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.glsl

@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4 inner;
+} u;
+
+void f() {
+  mat4 t = transpose(u.inner);
+  float l = length(u.inner[1]);
+  float a = abs(u.inner[0].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.msl
new file mode 100644
index 0000000..61a809d
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.msl

@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float4x4* tint_symbol [[buffer(0)]]) {
+  float4x4 const t = transpose(*(tint_symbol));
+  float const l = length((*(tint_symbol))[1]);
+  float const a = fabs(float4((*(tint_symbol))[0]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.spvasm
new file mode 100644
index 0000000..c026f19
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.spvasm

@@ -0,0 +1,51 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+         %18 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %u_block = OpTypeStruct %mat4v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+         %25 = OpConstantNull %int
+          %f = OpFunction %void None %7
+         %10 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0
+         %16 = OpLoad %mat4v4float %15
+         %11 = OpTranspose %mat4v4float %16
+         %22 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %23 = OpLoad %v4float %22
+         %17 = OpExtInst %float %18 Length %23
+         %26 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %25
+         %27 = OpLoad %v4float %26
+         %28 = OpVectorShuffle %v4float %27 %27 1 3 0 2
+         %29 = OpCompositeExtract %float %28 0
+         %24 = OpExtInst %float %18 FAbs %29
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.wgsl
new file mode 100644
index 0000000..2bf8fea
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_builtin.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : mat4x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u);
+  let l = length(u[1]);
+  let a = abs(u[0].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl
new file mode 100644
index 0000000..f149495
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl

@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : mat4x4<f32>;
+
+fn a(m : mat4x4<f32>) {}
+fn b(v : vec4<f32>) {}
+fn c(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    b(u[1].ywxz);
+    c(u[1].x);
+    c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b70e101
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.dxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+void a(float4x4 m) {
+}
+
+void b(float4 v) {
+}
+
+void c(float f_1) {
+}
+
+float4x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1]));
+  b(asfloat(u[1]).ywxz);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b70e101
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.fxc.hlsl

@@ -0,0 +1,30 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+void a(float4x4 m) {
+}
+
+void b(float4 v) {
+}
+
+void c(float f_1) {
+}
+
+float4x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(asfloat(u[1]));
+  b(asfloat(u[1]).ywxz);
+  c(asfloat(u[1].x));
+  c(asfloat(u[1]).ywxz.x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.glsl
new file mode 100644
index 0000000..2e2bb64
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.glsl

@@ -0,0 +1,28 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4 inner;
+} u;
+
+void a(mat4 m) {
+}
+
+void b(vec4 v) {
+}
+
+void c(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  b(u.inner[1].ywxz);
+  c(u.inner[1].x);
+  c(u.inner[1].ywxz.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.msl
new file mode 100644
index 0000000..8a08412
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.msl

@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+
+using namespace metal;
+void a(float4x4 m) {
+}
+
+void b(float4 v) {
+}
+
+void c(float f_1) {
+}
+
+kernel void f(const constant float4x4* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  b(float4((*(tint_symbol))[1]).ywxz);
+  c((*(tint_symbol))[1][0]);
+  c(float4((*(tint_symbol))[1]).ywxz[0]);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.spvasm
new file mode 100644
index 0000000..ba20788
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.spvasm

@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %m "m"
+               OpName %b "b"
+               OpName %v "v"
+               OpName %c "c"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %u_block = OpTypeStruct %mat4v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void %mat4v4float
+         %12 = OpTypeFunction %void %v4float
+         %16 = OpTypeFunction %void %float
+         %20 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %a = OpFunction %void None %7
+          %m = OpFunctionParameter %mat4v4float
+         %11 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %12
+          %v = OpFunctionParameter %v4float
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %16
+        %f_1 = OpFunctionParameter %float
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %20
+         %22 = OpLabel
+         %27 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0
+         %28 = OpLoad %mat4v4float %27
+         %23 = OpFunctionCall %void %a %28
+         %33 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %34 = OpLoad %v4float %33
+         %29 = OpFunctionCall %void %b %34
+         %36 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %37 = OpLoad %v4float %36
+         %38 = OpVectorShuffle %v4float %37 %37 1 3 0 2
+         %35 = OpFunctionCall %void %b %38
+         %41 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %uint_0
+         %42 = OpLoad %float %41
+         %39 = OpFunctionCall %void %c %42
+         %44 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %int_1
+         %45 = OpLoad %v4float %44
+         %46 = OpVectorShuffle %v4float %45 %45 1 3 0 2
+         %47 = OpCompositeExtract %float %46 0
+         %43 = OpFunctionCall %void %c %47
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.wgsl
new file mode 100644
index 0000000..eb33f7c
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_fn.wgsl.expected.wgsl

@@ -0,0 +1,19 @@
+@group(0) @binding(0) var<uniform> u : mat4x4<f32>;
+
+fn a(m : mat4x4<f32>) {
+}
+
+fn b(v : vec4<f32>) {
+}
+
+fn c(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  b(u[1].ywxz);
+  c(u[1].x);
+  c(u[1].ywxz.x);
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl
new file mode 100644
index 0000000..36f4c15
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat4x4<f32>;
+var<private> p : mat4x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[0];
+    p[1] = u[0].ywxz;
+    p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..7679686
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.dxc.hlsl

@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+static float4x4 p = float4x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float4x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0]);
+  p[1] = asfloat(u[0]).ywxz;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..7679686
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.fxc.hlsl

@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+static float4x4 p = float4x4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+
+float4x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = asfloat(u[0]);
+  p[1] = asfloat(u[0]).ywxz;
+  p[0][1] = asfloat(u[1].x);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.glsl
new file mode 100644
index 0000000..a45b304
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.glsl

@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4 inner;
+} u;
+
+mat4 p = mat4(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
+void f() {
+  p = u.inner;
+  p[1] = u.inner[0];
+  p[1] = u.inner[0].ywxz;
+  p[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.msl
new file mode 100644
index 0000000..1230ff9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.msl

@@ -0,0 +1,12 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(const constant float4x4* tint_symbol_1 [[buffer(0)]]) {
+  thread float4x4 tint_symbol = float4x4(0.0f);
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[0];
+  tint_symbol[1] = float4((*(tint_symbol_1))[0]).ywxz;
+  tint_symbol[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.spvasm
new file mode 100644
index 0000000..ea0d486
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.spvasm

@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %u_block = OpTypeStruct %mat4v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private_mat4v4float = OpTypePointer Private %mat4v4float
+          %9 = OpConstantNull %mat4v4float
+          %p = OpVariable %_ptr_Private_mat4v4float Private %9
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_v4float = OpTypePointer Private %v4float
+         %23 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %17 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0
+         %18 = OpLoad %mat4v4float %17
+               OpStore %p %18
+         %22 = OpAccessChain %_ptr_Private_v4float %p %int_1
+         %25 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %23
+         %26 = OpLoad %v4float %25
+               OpStore %22 %26
+         %27 = OpAccessChain %_ptr_Private_v4float %p %int_1
+         %28 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %23
+         %29 = OpLoad %v4float %28
+         %30 = OpVectorShuffle %v4float %29 %29 1 3 0 2
+               OpStore %27 %30
+         %32 = OpAccessChain %_ptr_Private_float %p %23 %int_1
+         %34 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %23
+         %35 = OpLoad %float %34
+               OpStore %32 %35
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.wgsl
new file mode 100644
index 0000000..7c0e888
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_private.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat4x4<f32>;
+
+var<private> p : mat4x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[0];
+  p[1] = u[0].ywxz;
+  p[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl
new file mode 100644
index 0000000..801c31f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat4x4<f32>;
+@group(0) @binding(1) var<storage, read_write> s : mat4x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[0];
+    s[1] = u[0].ywxz;
+    s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..cd27df0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.dxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+float4x4 tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store4(16u, asuint(asfloat(u[0])));
+  s.Store4(16u, asuint(asfloat(u[0]).ywxz));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..cd27df0
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.fxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float4x4 value) {
+  buffer.Store4((offset + 0u), asuint(value[0u]));
+  buffer.Store4((offset + 16u), asuint(value[1u]));
+  buffer.Store4((offset + 32u), asuint(value[2u]));
+  buffer.Store4((offset + 48u), asuint(value[3u]));
+}
+
+float4x4 tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_2(u, 0u));
+  s.Store4(16u, asuint(asfloat(u[0])));
+  s.Store4(16u, asuint(asfloat(u[0]).ywxz));
+  s.Store(4u, asuint(asfloat(u[1].x)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.glsl
new file mode 100644
index 0000000..d8d07fb
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4 inner;
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat4 inner;
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[0];
+  s.inner[1] = u.inner[0].ywxz;
+  s.inner[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.msl
new file mode 100644
index 0000000..c3e6062
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.msl

@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void f(device float4x4* tint_symbol [[buffer(1)]], const constant float4x4* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.spvasm
new file mode 100644
index 0000000..ade3b6f
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.spvasm

@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %u_block = OpTypeStruct %mat4v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+          %9 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_mat4v4float = OpTypePointer StorageBuffer %mat4v4float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float
+         %24 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %9
+         %12 = OpLabel
+         %16 = OpAccessChain %_ptr_StorageBuffer_mat4v4float %s %uint_0
+         %18 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0
+         %19 = OpLoad %mat4v4float %18
+               OpStore %16 %19
+         %23 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1
+         %26 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %24
+         %27 = OpLoad %v4float %26
+               OpStore %23 %27
+         %28 = OpAccessChain %_ptr_StorageBuffer_v4float %s %uint_0 %int_1
+         %29 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %24
+         %30 = OpLoad %v4float %29
+         %31 = OpVectorShuffle %v4float %30 %30 1 3 0 2
+               OpStore %28 %31
+         %33 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %24 %int_1
+         %35 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %24
+         %36 = OpLoad %float %35
+               OpStore %33 %36
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.wgsl
new file mode 100644
index 0000000..f21e4d9
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_storage.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat4x4<f32>;
+
+@group(0) @binding(1) var<storage, read_write> s : mat4x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[0];
+  s[1] = u[0].ywxz;
+  s[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl
new file mode 100644
index 0000000..b629c9e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl

@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : mat4x4<f32>;
+var<workgroup> w : mat4x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[0];
+    w[1] = u[0].ywxz;
+    w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..eb1a4f3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.dxc.hlsl

@@ -0,0 +1,33 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+groupshared float4x4 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x4 tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0]);
+  w[1] = asfloat(u[0]).ywxz;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..eb1a4f3
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.fxc.hlsl

@@ -0,0 +1,33 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+groupshared float4x4 w;
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x4 tint_symbol_2(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    w = float4x4((0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx, (0.0f).xxxx);
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = asfloat(u[0]);
+  w[1] = asfloat(u[0]).ywxz;
+  w[0][1] = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.glsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.glsl
new file mode 100644
index 0000000..2039627
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4 inner;
+} u;
+
+shared mat4 w;
+void f(uint local_invocation_index) {
+  {
+    w = mat4(vec4(0.0f), vec4(0.0f), vec4(0.0f), vec4(0.0f));
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[0];
+  w[1] = u.inner[0].ywxz;
+  w[0][1] = u.inner[1][0];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.msl
new file mode 100644
index 0000000..172975e
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.msl

@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+struct tint_symbol_5 {
+  float4x4 w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup float4x4* const tint_symbol, const constant float4x4* const tint_symbol_1) {
+  {
+    *(tint_symbol) = float4x4(float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[0];
+  (*(tint_symbol))[1] = float4((*(tint_symbol_1))[0]).ywxz;
+  (*(tint_symbol))[0][1] = (*(tint_symbol_1))[1][0];
+}
+
+kernel void f(const constant float4x4* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup float4x4* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.spvasm b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.spvasm
new file mode 100644
index 0000000..d8f1b17
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.spvasm

@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %u_block = OpTypeStruct %mat4v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup_mat4v4float = OpTypePointer Workgroup %mat4v4float
+          %w = OpVariable %_ptr_Workgroup_mat4v4float Workgroup
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void %uint
+         %17 = OpConstantNull %mat4v4float
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+         %29 = OpConstantNull %int
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %42 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %12
+%local_invocation_index = OpFunctionParameter %uint
+         %16 = OpLabel
+               OpStore %w %17
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %23 = OpAccessChain %_ptr_Uniform_mat4v4float %u %uint_0
+         %24 = OpLoad %mat4v4float %23
+               OpStore %w %24
+         %28 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1
+         %31 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %29
+         %32 = OpLoad %v4float %31
+               OpStore %28 %32
+         %33 = OpAccessChain %_ptr_Workgroup_v4float %w %int_1
+         %34 = OpAccessChain %_ptr_Uniform_v4float %u %uint_0 %29
+         %35 = OpLoad %v4float %34
+         %36 = OpVectorShuffle %v4float %35 %35 1 3 0 2
+               OpStore %33 %36
+         %38 = OpAccessChain %_ptr_Workgroup_float %w %29 %int_1
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %int_1 %29
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %42
+         %44 = OpLabel
+         %46 = OpLoad %uint %local_invocation_index_1
+         %45 = OpFunctionCall %void %f_inner %46
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.wgsl b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.wgsl
new file mode 100644
index 0000000..5bc17b2
--- /dev/null
+++ b/test/tint/buffer/uniform/std140/unnested/mat4x4_f32/to_workgroup.wgsl.expected.wgsl

@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : mat4x4<f32>;
+
+var<workgroup> w : mat4x4<f32>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[0];
+  w[1] = u[0].ywxz;
+  w[0][1] = u[1][0];
+}

diff --git a/test/tint/buffer/uniform/types/array.wgsl b/test/tint/buffer/uniform/types/array.wgsl
deleted file mode 100644
index 90a400c..0000000
--- a/test/tint/buffer/uniform/types/array.wgsl
+++ /dev/null

@@ -1,7 +0,0 @@
-@group(0) @binding(0)
-var<uniform> u : array<vec4<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn main() {
-  let x = u;
-}

diff --git a/test/tint/buffer/uniform/types/array.wgsl.expected.glsl b/test/tint/buffer/uniform/types/array.wgsl.expected.glsl
deleted file mode 100644
index c3f6dce..0000000
--- a/test/tint/buffer/uniform/types/array.wgsl.expected.glsl
+++ /dev/null

@@ -1,15 +0,0 @@
-#version 310 es
-
-layout(binding = 0, std140) uniform u_block_ubo {
-  vec4 inner[4];
-} u;
-
-void tint_symbol() {
-  vec4 x[4] = u.inner;
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  tint_symbol();
-  return;
-}

diff --git a/test/tint/buffer/uniform/types/array.wgsl.expected.msl b/test/tint/buffer/uniform/types/array.wgsl.expected.msl
deleted file mode 100644
index 730f4b1..0000000
--- a/test/tint/buffer/uniform/types/array.wgsl.expected.msl
+++ /dev/null

@@ -1,21 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-
-template<typename T, size_t N>
-struct tint_array {
-    const constant T& operator[](size_t i) const constant { return elements[i]; }
-    device T& operator[](size_t i) device { return elements[i]; }
-    const device T& operator[](size_t i) const device { return elements[i]; }
-    thread T& operator[](size_t i) thread { return elements[i]; }
-    const thread T& operator[](size_t i) const thread { return elements[i]; }
-    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
-    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
-    T elements[N];
-};
-
-kernel void tint_symbol(const constant tint_array<float4, 4>* tint_symbol_1 [[buffer(0)]]) {
-  tint_array<float4, 4> const x = *(tint_symbol_1);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/types/array.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/array.wgsl.expected.spvasm
deleted file mode 100644
index ef26f1d..0000000
--- a/test/tint/buffer/uniform/types/array.wgsl.expected.spvasm
+++ /dev/null

@@ -1,37 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 17
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %main "main"
-               OpExecutionMode %main LocalSize 1 1 1
-               OpName %u_block "u_block"
-               OpMemberName %u_block 0 "inner"
-               OpName %u "u"
-               OpName %main "main"
-               OpDecorate %u_block Block
-               OpMemberDecorate %u_block 0 Offset 0
-               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-      %float = OpTypeFloat 32
-    %v4float = OpTypeVector %float 4
-       %uint = OpTypeInt 32 0
-     %uint_4 = OpConstant %uint 4
-%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
-    %u_block = OpTypeStruct %_arr_v4float_uint_4
-%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
-          %u = OpVariable %_ptr_Uniform_u_block Uniform
-       %void = OpTypeVoid
-          %9 = OpTypeFunction %void
-     %uint_0 = OpConstant %uint 0
-%_ptr_Uniform__arr_v4float_uint_4 = OpTypePointer Uniform %_arr_v4float_uint_4
-       %main = OpFunction %void None %9
-         %12 = OpLabel
-         %15 = OpAccessChain %_ptr_Uniform__arr_v4float_uint_4 %u %uint_0
-         %16 = OpLoad %_arr_v4float_uint_4 %15
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/array.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/array.wgsl.expected.wgsl
deleted file mode 100644
index 8c2b5f0..0000000
--- a/test/tint/buffer/uniform/types/array.wgsl.expected.wgsl
+++ /dev/null

@@ -1,6 +0,0 @@
-@group(0) @binding(0) var<uniform> u : array<vec4<f32>, 4>;
-
-@compute @workgroup_size(1)
-fn main() {
-  let x = u;
-}

diff --git a/test/tint/buffer/uniform/types/f16.wgsl b/test/tint/buffer/uniform/types/f16.wgsl
new file mode 100644
index 0000000..6b0d2c8
--- /dev/null
+++ b/test/tint/buffer/uniform/types/f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : f16;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5ced784
--- /dev/null
+++ b/test/tint/buffer/uniform/types/f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float16_t x = float16_t(f16tof32(((u[0].x) & 0xFFFF)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..063ca53
--- /dev/null
+++ b/test/tint/buffer/uniform/types/f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,14 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float16_t x = float16_t(f16tof32(((u[0].x) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001B3B36433A0(7,9-17): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/f16.wgsl.expected.glsl
new file mode 100644
index 0000000..079f7c1
--- /dev/null
+++ b/test/tint/buffer/uniform/types/f16.wgsl.expected.glsl

@@ -0,0 +1,16 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  float16_t inner;
+} u;
+
+void tint_symbol() {
+  float16_t x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/f16.wgsl.expected.msl
new file mode 100644
index 0000000..e48371e
--- /dev/null
+++ b/test/tint/buffer/uniform/types/f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half* tint_symbol_1 [[buffer(0)]]) {
+  half const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..1765e3b
--- /dev/null
+++ b/test/tint/buffer/uniform/types/f16.wgsl.expected.spvasm

@@ -0,0 +1,37 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 14
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+    %u_block = OpTypeStruct %half
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %5 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+       %main = OpFunction %void None %5
+          %8 = OpLabel
+         %12 = OpAccessChain %_ptr_Uniform_half %u %uint_0
+         %13 = OpLoad %half %12
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..671d079
--- /dev/null
+++ b/test/tint/buffer/uniform/types/f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : f16;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x2_f16.wgsl b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl
new file mode 100644
index 0000000..78602cc
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..cb72d73
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 2, 2> x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f5360c9
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,22 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 2, 2> x = tint_symbol(u, 0u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000017F74BE8F50(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..2bfde1c
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.glsl

@@ -0,0 +1,21 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+} u;
+
+f16mat2 load_u_inner() {
+  return f16mat2(u.inner_0, u.inner_1);
+}
+
+void tint_symbol() {
+  f16mat2 x = load_u_inner();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.msl
new file mode 100644
index 0000000..ada512c
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half2x2* tint_symbol_1 [[buffer(0)]]) {
+  half2x2 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..a350fc2
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.spvasm

@@ -0,0 +1,52 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %main "main"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v2half = OpTypeMatrix %v2half 2
+          %6 = OpTypeFunction %mat2v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %22 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat2v2half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %16 = OpLoad %v2half %15
+         %19 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %20 = OpLoad %v2half %19
+         %21 = OpCompositeConstruct %mat2v2half %16 %20
+               OpReturnValue %21
+               OpFunctionEnd
+       %main = OpFunction %void None %22
+         %25 = OpLabel
+         %26 = OpFunctionCall %mat2v2half %load_u_inner
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..b6eac28
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x2_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x2.wgsl b/test/tint/buffer/uniform/types/mat2x2_f32.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x2.wgsl
rename to test/tint/buffer/uniform/types/mat2x2_f32.wgsl


diff --git a/test/tint/buffer/uniform/types/mat2x2.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x2.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/types/mat2x2.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x2.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/types/mat2x2.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x2.wgsl.expected.glsl
rename to test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/types/mat2x2.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x2.wgsl.expected.msl
rename to test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/types/mat2x2.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x2.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/types/mat2x2.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x2.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/types/mat2x2_f32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/types/mat2x3_f16.wgsl b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl
new file mode 100644
index 0000000..9b92a38
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..ee623a0
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 2, 3> x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..cda8557
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,28 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 2, 3> x = tint_symbol(u, 0u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000248395CA260(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..4e6f41a
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.glsl

@@ -0,0 +1,21 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+} u;
+
+f16mat2x3 load_u_inner() {
+  return f16mat2x3(u.inner_0, u.inner_1);
+}
+
+void tint_symbol() {
+  f16mat2x3 x = load_u_inner();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.msl
new file mode 100644
index 0000000..58a4bff
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half2x3* tint_symbol_1 [[buffer(0)]]) {
+  half2x3 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..0ed7fbe
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.spvasm

@@ -0,0 +1,52 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %main "main"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v3half = OpTypeMatrix %v3half 2
+          %6 = OpTypeFunction %mat2v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %22 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat2v3half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %16 = OpLoad %v3half %15
+         %19 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %20 = OpLoad %v3half %19
+         %21 = OpCompositeConstruct %mat2v3half %16 %20
+               OpReturnValue %21
+               OpFunctionEnd
+       %main = OpFunction %void None %22
+         %25 = OpLabel
+         %26 = OpFunctionCall %mat2v3half %load_u_inner
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..73c789b
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x3_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x3.wgsl b/test/tint/buffer/uniform/types/mat2x3_f32.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x3.wgsl
rename to test/tint/buffer/uniform/types/mat2x3_f32.wgsl


diff --git a/test/tint/buffer/uniform/types/mat2x3.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x3.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/types/mat2x3.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x3.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/types/mat2x3.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x3.wgsl.expected.glsl
rename to test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/types/mat2x3.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x3.wgsl.expected.msl
rename to test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/types/mat2x3.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x3.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/types/mat2x3.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat2x3.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/types/mat2x3_f32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/types/mat2x4_f16.wgsl b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl
new file mode 100644
index 0000000..b6b79c5
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c891507
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,23 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 2, 4> x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8f306c1
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,28 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 2, 4> x = tint_symbol(u, 0u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000010C86099F60(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..5a472a1
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.glsl

@@ -0,0 +1,21 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+} u;
+
+f16mat2x4 load_u_inner() {
+  return f16mat2x4(u.inner_0, u.inner_1);
+}
+
+void tint_symbol() {
+  f16mat2x4 x = load_u_inner();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.msl
new file mode 100644
index 0000000..932aa83
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half2x4* tint_symbol_1 [[buffer(0)]]) {
+  half2x4 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..6ffde9e
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.spvasm

@@ -0,0 +1,52 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %main "main"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+          %6 = OpTypeFunction %mat2v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %22 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat2v4half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %16 = OpLoad %v4half %15
+         %19 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %20 = OpLoad %v4half %19
+         %21 = OpCompositeConstruct %mat2v4half %16 %20
+               OpReturnValue %21
+               OpFunctionEnd
+       %main = OpFunction %void None %22
+         %25 = OpLabel
+         %26 = OpFunctionCall %mat2v4half %load_u_inner
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..d6ff764
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat2x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x4_f32.wgsl b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl
new file mode 100644
index 0000000..1847020
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl

@@ -0,0 +1,7 @@
+@group(0) @binding(0)
+var<uniform> u : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..cead0e2
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,15 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float2x4 x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..cead0e2
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,15 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+float2x4 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float2x4 x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..c476a7f
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.glsl

@@ -0,0 +1,15 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat2x4 inner;
+} u;
+
+void tint_symbol() {
+  mat2x4 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.msl
new file mode 100644
index 0000000..b5c1099
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant float2x4* tint_symbol_1 [[buffer(0)]]) {
+  float2x4 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..9da8a2f
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.spvasm

@@ -0,0 +1,37 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 16
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+    %u_block = OpTypeStruct %mat2v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat2v4float = OpTypePointer Uniform %mat2v4float
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_Uniform_mat2v4float %u %uint_0
+         %15 = OpLoad %mat2v4float %14
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..368e8b0
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat2x4_f32.wgsl.expected.wgsl

@@ -0,0 +1,6 @@
+@group(0) @binding(0) var<uniform> u : mat2x4<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x2_f16.wgsl b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl
new file mode 100644
index 0000000..5f158a3
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..704945f
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 3, 2> x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..32c5415
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,24 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 3, 2> x = tint_symbol(u, 0u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000014294788E00(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..766b477
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+} u;
+
+f16mat3x2 load_u_inner() {
+  return f16mat3x2(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void tint_symbol() {
+  f16mat3x2 x = load_u_inner();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.msl
new file mode 100644
index 0000000..f5c897d
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half3x2* tint_symbol_1 [[buffer(0)]]) {
+  half3x2 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..a365da4
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.spvasm

@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %main "main"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v2half = OpTypeMatrix %v2half 3
+          %6 = OpTypeFunction %mat3v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %26 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat3v2half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %16 = OpLoad %v2half %15
+         %19 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %20 = OpLoad %v2half %19
+         %23 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %24 = OpLoad %v2half %23
+         %25 = OpCompositeConstruct %mat3v2half %16 %20 %24
+               OpReturnValue %25
+               OpFunctionEnd
+       %main = OpFunction %void None %26
+         %29 = OpLabel
+         %30 = OpFunctionCall %mat3v2half %load_u_inner
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..512b9ac
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x2_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x2.wgsl b/test/tint/buffer/uniform/types/mat3x2_f32.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat3x2.wgsl
rename to test/tint/buffer/uniform/types/mat3x2_f32.wgsl


diff --git a/test/tint/buffer/uniform/types/mat3x2.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat3x2.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/types/mat3x2.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat3x2.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/types/mat3x2.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat3x2.wgsl.expected.glsl
rename to test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/types/mat3x2.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat3x2.wgsl.expected.msl
rename to test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/types/mat3x2.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/types/mat3x2.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/types/mat3x2.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat3x2.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/types/mat3x2_f32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/types/mat3x3_f16.wgsl b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl
new file mode 100644
index 0000000..967f0cd
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..04db7e6
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 3, 3> x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..6b5696d
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,33 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 3, 3> x = tint_symbol(u, 0u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000231800429A0(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..17d202d
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+} u;
+
+f16mat3 load_u_inner() {
+  return f16mat3(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void tint_symbol() {
+  f16mat3 x = load_u_inner();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.msl
new file mode 100644
index 0000000..060a4ff
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half3x3* tint_symbol_1 [[buffer(0)]]) {
+  half3x3 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..044aa6c
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.spvasm

@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %main "main"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+          %6 = OpTypeFunction %mat3v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %26 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat3v3half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %16 = OpLoad %v3half %15
+         %19 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %20 = OpLoad %v3half %19
+         %23 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %24 = OpLoad %v3half %23
+         %25 = OpCompositeConstruct %mat3v3half %16 %20 %24
+               OpReturnValue %25
+               OpFunctionEnd
+       %main = OpFunction %void None %26
+         %29 = OpLabel
+         %30 = OpFunctionCall %mat3v3half %load_u_inner
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..c7b7675
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x3_f32.wgsl b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl
new file mode 100644
index 0000000..a67f2fa
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl

@@ -0,0 +1,7 @@
+@group(0) @binding(0)
+var<uniform> u : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..310ff04
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,16 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float3x3 x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..310ff04
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,16 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+float3x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float3x3 x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..5b15b84
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.glsl

@@ -0,0 +1,15 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3 inner;
+} u;
+
+void tint_symbol() {
+  mat3 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.msl
new file mode 100644
index 0000000..328708c
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant float3x3* tint_symbol_1 [[buffer(0)]]) {
+  float3x3 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..18970bc
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.spvasm

@@ -0,0 +1,37 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 16
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+    %u_block = OpTypeStruct %mat3v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_Uniform_mat3v3float %u %uint_0
+         %15 = OpLoad %mat3v3float %14
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..e56c0b6
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x3_f32.wgsl.expected.wgsl

@@ -0,0 +1,6 @@
+@group(0) @binding(0) var<uniform> u : mat3x3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x4_f16.wgsl b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl
new file mode 100644
index 0000000..3830f33
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a13d689
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,28 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 3, 4> x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..b410283
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,33 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 3, 4> x = tint_symbol(u, 0u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001E049208950(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..2cb7a29
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+} u;
+
+f16mat3x4 load_u_inner() {
+  return f16mat3x4(u.inner_0, u.inner_1, u.inner_2);
+}
+
+void tint_symbol() {
+  f16mat3x4 x = load_u_inner();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.msl
new file mode 100644
index 0000000..e28cf69
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half3x4* tint_symbol_1 [[buffer(0)]]) {
+  half3x4 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..22a1399
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.spvasm

@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %main "main"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat3v4half = OpTypeMatrix %v4half 3
+          %6 = OpTypeFunction %mat3v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %26 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat3v4half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %16 = OpLoad %v4half %15
+         %19 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %20 = OpLoad %v4half %19
+         %23 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %24 = OpLoad %v4half %23
+         %25 = OpCompositeConstruct %mat3v4half %16 %20 %24
+               OpReturnValue %25
+               OpFunctionEnd
+       %main = OpFunction %void None %26
+         %29 = OpLabel
+         %30 = OpFunctionCall %mat3v4half %load_u_inner
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..0047e5a
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat3x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x4_f32.wgsl b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl
new file mode 100644
index 0000000..ab5dda1
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl

@@ -0,0 +1,7 @@
+@group(0) @binding(0)
+var<uniform> u : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..4a409e5
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,16 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float3x4 x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..4a409e5
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,16 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[3];
+};
+
+float3x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float3x4 x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..318b5f4
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.glsl

@@ -0,0 +1,15 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat3x4 inner;
+} u;
+
+void tint_symbol() {
+  mat3x4 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.msl
new file mode 100644
index 0000000..5cebcd4
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant float3x4* tint_symbol_1 [[buffer(0)]]) {
+  float3x4 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..6d7f26a
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.spvasm

@@ -0,0 +1,37 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 16
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+    %u_block = OpTypeStruct %mat3v4float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat3v4float = OpTypePointer Uniform %mat3v4float
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_Uniform_mat3v4float %u %uint_0
+         %15 = OpLoad %mat3v4float %14
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..97fea4f
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat3x4_f32.wgsl.expected.wgsl

@@ -0,0 +1,6 @@
+@group(0) @binding(0) var<uniform> u : mat3x4<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x2_f16.wgsl b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl
new file mode 100644
index 0000000..04f5a9a
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..4f03c89
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 4, 2> x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a5f251c
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,26 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 4, 2> x = tint_symbol(u, 0u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000017761DC8170(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..faa76d2
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec2 inner_0;
+  f16vec2 inner_1;
+  f16vec2 inner_2;
+  f16vec2 inner_3;
+} u;
+
+f16mat4x2 load_u_inner() {
+  return f16mat4x2(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void tint_symbol() {
+  f16mat4x2 x = load_u_inner();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.msl
new file mode 100644
index 0000000..37a617d
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half4x2* tint_symbol_1 [[buffer(0)]]) {
+  half4x2 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..61763c4
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.spvasm

@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 35
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %main "main"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 4
+               OpMemberDecorate %u_block_std140 2 Offset 8
+               OpMemberDecorate %u_block_std140 3 Offset 12
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+%u_block_std140 = OpTypeStruct %v2half %v2half %v2half %v2half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v2half = OpTypeMatrix %v2half 4
+          %6 = OpTypeFunction %mat4v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %30 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat4v2half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %16 = OpLoad %v2half %15
+         %19 = OpAccessChain %_ptr_Uniform_v2half %u %uint_1
+         %20 = OpLoad %v2half %19
+         %23 = OpAccessChain %_ptr_Uniform_v2half %u %uint_2
+         %24 = OpLoad %v2half %23
+         %27 = OpAccessChain %_ptr_Uniform_v2half %u %uint_3
+         %28 = OpLoad %v2half %27
+         %29 = OpCompositeConstruct %mat4v2half %16 %20 %24 %28
+               OpReturnValue %29
+               OpFunctionEnd
+       %main = OpFunction %void None %30
+         %33 = OpLabel
+         %34 = OpFunctionCall %mat4v2half %load_u_inner
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..fe2c66b
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x2_f32.wgsl b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl
new file mode 100644
index 0000000..94291b1
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl

@@ -0,0 +1,7 @@
+@group(0) @binding(0)
+var<uniform> u : mat4x2<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a6278e9
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+float4x2 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float4x2 x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a6278e9
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,21 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+float4x2 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float4x2 x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..aaaf7e5
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.glsl

@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  vec2 inner_0;
+  vec2 inner_1;
+  vec2 inner_2;
+  vec2 inner_3;
+} u;
+
+mat4x2 load_u_inner() {
+  return mat4x2(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void tint_symbol() {
+  mat4x2 x = load_u_inner();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.msl
new file mode 100644
index 0000000..9c68fb1
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant float4x2* tint_symbol_1 [[buffer(0)]]) {
+  float4x2 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..c9e3140
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.spvasm

@@ -0,0 +1,58 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 35
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %main "main"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%u_block_std140 = OpTypeStruct %v2float %v2float %v2float %v2float
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+%mat4v2float = OpTypeMatrix %v2float 4
+          %6 = OpTypeFunction %mat4v2float
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %30 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat4v2float None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0
+         %16 = OpLoad %v2float %15
+         %19 = OpAccessChain %_ptr_Uniform_v2float %u %uint_1
+         %20 = OpLoad %v2float %19
+         %23 = OpAccessChain %_ptr_Uniform_v2float %u %uint_2
+         %24 = OpLoad %v2float %23
+         %27 = OpAccessChain %_ptr_Uniform_v2float %u %uint_3
+         %28 = OpLoad %v2float %27
+         %29 = OpCompositeConstruct %mat4v2float %16 %20 %24 %28
+               OpReturnValue %29
+               OpFunctionEnd
+       %main = OpFunction %void None %30
+         %33 = OpLabel
+         %34 = OpFunctionCall %mat4v2float %load_u_inner
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..4dee7ee
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x2_f32.wgsl.expected.wgsl

@@ -0,0 +1,6 @@
+@group(0) @binding(0) var<uniform> u : mat4x2<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x3_f16.wgsl b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl
new file mode 100644
index 0000000..526ef36
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..5acbef9
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,33 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 4, 3> x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..804c81e
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,38 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 4, 3> x = tint_symbol(u, 0u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000184D1079D40(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..03ff053
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec3 inner_0;
+  f16vec3 inner_1;
+  f16vec3 inner_2;
+  f16vec3 inner_3;
+} u;
+
+f16mat4x3 load_u_inner() {
+  return f16mat4x3(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void tint_symbol() {
+  f16mat4x3 x = load_u_inner();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.msl
new file mode 100644
index 0000000..199ceee
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half4x3* tint_symbol_1 [[buffer(0)]]) {
+  half4x3 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..1d0fe82
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.spvasm

@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 35
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %main "main"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+%u_block_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+          %6 = OpTypeFunction %mat4v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %30 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat4v3half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %16 = OpLoad %v3half %15
+         %19 = OpAccessChain %_ptr_Uniform_v3half %u %uint_1
+         %20 = OpLoad %v3half %19
+         %23 = OpAccessChain %_ptr_Uniform_v3half %u %uint_2
+         %24 = OpLoad %v3half %23
+         %27 = OpAccessChain %_ptr_Uniform_v3half %u %uint_3
+         %28 = OpLoad %v3half %27
+         %29 = OpCompositeConstruct %mat4v3half %16 %20 %24 %28
+               OpReturnValue %29
+               OpFunctionEnd
+       %main = OpFunction %void None %30
+         %33 = OpLabel
+         %34 = OpFunctionCall %mat4v3half %load_u_inner
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..43c4a43
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x3_f32.wgsl b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl
new file mode 100644
index 0000000..d8cd5b4
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl

@@ -0,0 +1,7 @@
+@group(0) @binding(0)
+var<uniform> u : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..044877c
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float4x3 x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..044877c
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,17 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+float4x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float4x3 x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..0cffe2d
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.glsl

@@ -0,0 +1,15 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner;
+} u;
+
+void tint_symbol() {
+  mat4x3 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.msl
new file mode 100644
index 0000000..482db1c
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant float4x3* tint_symbol_1 [[buffer(0)]]) {
+  float4x3 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..d96eb83
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.spvasm

@@ -0,0 +1,37 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 16
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+    %u_block = OpTypeStruct %mat4v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+       %main = OpFunction %void None %7
+         %10 = OpLabel
+         %14 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0
+         %15 = OpLoad %mat4v3float %14
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..d177f74
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x3_f32.wgsl.expected.wgsl

@@ -0,0 +1,6 @@
+@group(0) @binding(0) var<uniform> u : mat4x3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x4_f16.wgsl b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl
new file mode 100644
index 0000000..f30a48e
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a0e9f1b5
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,33 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 4, 4> x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..8b29f19
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,38 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const matrix<float16_t, 4, 4> x = tint_symbol(u, 0u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000014CB93FB460(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..e84101c
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.glsl

@@ -0,0 +1,23 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  f16vec4 inner_0;
+  f16vec4 inner_1;
+  f16vec4 inner_2;
+  f16vec4 inner_3;
+} u;
+
+f16mat4 load_u_inner() {
+  return f16mat4(u.inner_0, u.inner_1, u.inner_2, u.inner_3);
+}
+
+void tint_symbol() {
+  f16mat4 x = load_u_inner();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.msl
new file mode 100644
index 0000000..aad6510
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half4x4* tint_symbol_1 [[buffer(0)]]) {
+  half4x4 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..0dc3bce
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.spvasm

@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 35
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner_0"
+               OpMemberName %u_block_std140 1 "inner_1"
+               OpMemberName %u_block_std140 2 "inner_2"
+               OpMemberName %u_block_std140 3 "inner_3"
+               OpName %u "u"
+               OpName %load_u_inner "load_u_inner"
+               OpName %main "main"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %u_block_std140 1 Offset 8
+               OpMemberDecorate %u_block_std140 2 Offset 16
+               OpMemberDecorate %u_block_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+%u_block_std140 = OpTypeStruct %v4half %v4half %v4half %v4half
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v4half = OpTypeMatrix %v4half 4
+          %6 = OpTypeFunction %mat4v4half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %30 = OpTypeFunction %void
+%load_u_inner = OpFunction %mat4v4half None %6
+          %9 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %16 = OpLoad %v4half %15
+         %19 = OpAccessChain %_ptr_Uniform_v4half %u %uint_1
+         %20 = OpLoad %v4half %19
+         %23 = OpAccessChain %_ptr_Uniform_v4half %u %uint_2
+         %24 = OpLoad %v4half %23
+         %27 = OpAccessChain %_ptr_Uniform_v4half %u %uint_3
+         %28 = OpLoad %v4half %27
+         %29 = OpCompositeConstruct %mat4v4half %16 %20 %24 %28
+               OpReturnValue %29
+               OpFunctionEnd
+       %main = OpFunction %void None %30
+         %33 = OpLabel
+         %34 = OpFunctionCall %mat4v4half %load_u_inner
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..0186b40
--- /dev/null
+++ b/test/tint/buffer/uniform/types/mat4x4_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : mat4x4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/mat4x4.wgsl b/test/tint/buffer/uniform/types/mat4x4_f32.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat4x4.wgsl
rename to test/tint/buffer/uniform/types/mat4x4_f32.wgsl


diff --git a/test/tint/buffer/uniform/types/mat4x4.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat4x4.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/types/mat4x4.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat4x4.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/types/mat4x4.wgsl.expected.glsl b/test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat4x4.wgsl.expected.glsl
rename to test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/types/mat4x4.wgsl.expected.msl b/test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat4x4.wgsl.expected.msl
rename to test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/types/mat4x4.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/types/mat4x4.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/types/mat4x4.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/mat4x4.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/types/mat4x4_f32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/types/struct.wgsl b/test/tint/buffer/uniform/types/struct.wgsl
deleted file mode 100644
index c50edb3..0000000
--- a/test/tint/buffer/uniform/types/struct.wgsl
+++ /dev/null

@@ -1,14 +0,0 @@
-struct Inner {
-  f : f32,
-};
-struct S {
-  inner : Inner,
-};
-
-@group(0) @binding(0)
-var<uniform> u : S;
-
-@compute @workgroup_size(1)
-fn main() {
-  let x = u;
-}

diff --git a/test/tint/buffer/uniform/types/struct.wgsl.expected.glsl b/test/tint/buffer/uniform/types/struct.wgsl.expected.glsl
deleted file mode 100644
index b717d8c..0000000
--- a/test/tint/buffer/uniform/types/struct.wgsl.expected.glsl
+++ /dev/null

@@ -1,26 +0,0 @@
-#version 310 es
-
-struct Inner {
-  float f;
-  uint pad;
-  uint pad_1;
-  uint pad_2;
-};
-
-struct S {
-  Inner inner;
-};
-
-layout(binding = 0, std140) uniform u_block_ubo {
-  S inner;
-} u;
-
-void tint_symbol() {
-  S x = u.inner;
-}
-
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-void main() {
-  tint_symbol();
-  return;
-}

diff --git a/test/tint/buffer/uniform/types/struct.wgsl.expected.msl b/test/tint/buffer/uniform/types/struct.wgsl.expected.msl
deleted file mode 100644
index 41e3ae3..0000000
--- a/test/tint/buffer/uniform/types/struct.wgsl.expected.msl
+++ /dev/null

@@ -1,16 +0,0 @@
-#include <metal_stdlib>
-
-using namespace metal;
-struct Inner {
-  /* 0x0000 */ float f;
-};
-
-struct S {
-  /* 0x0000 */ Inner inner;
-};
-
-kernel void tint_symbol(const constant S* tint_symbol_1 [[buffer(0)]]) {
-  S const x = *(tint_symbol_1);
-  return;
-}
-

diff --git a/test/tint/buffer/uniform/types/struct.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/struct.wgsl.expected.spvasm
deleted file mode 100644
index 9c63d49..0000000
--- a/test/tint/buffer/uniform/types/struct.wgsl.expected.spvasm
+++ /dev/null

@@ -1,41 +0,0 @@
-; SPIR-V
-; Version: 1.3
-; Generator: Google Tint Compiler; 0
-; Bound: 16
-; Schema: 0
-               OpCapability Shader
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint GLCompute %main "main"
-               OpExecutionMode %main LocalSize 1 1 1
-               OpName %u_block "u_block"
-               OpMemberName %u_block 0 "inner"
-               OpName %S "S"
-               OpMemberName %S 0 "inner"
-               OpName %Inner "Inner"
-               OpMemberName %Inner 0 "f"
-               OpName %u "u"
-               OpName %main "main"
-               OpDecorate %u_block Block
-               OpMemberDecorate %u_block 0 Offset 0
-               OpMemberDecorate %S 0 Offset 0
-               OpMemberDecorate %Inner 0 Offset 0
-               OpDecorate %u NonWritable
-               OpDecorate %u DescriptorSet 0
-               OpDecorate %u Binding 0
-      %float = OpTypeFloat 32
-      %Inner = OpTypeStruct %float
-          %S = OpTypeStruct %Inner
-    %u_block = OpTypeStruct %S
-%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
-          %u = OpVariable %_ptr_Uniform_u_block Uniform
-       %void = OpTypeVoid
-          %7 = OpTypeFunction %void
-       %uint = OpTypeInt 32 0
-     %uint_0 = OpConstant %uint 0
-%_ptr_Uniform_S = OpTypePointer Uniform %S
-       %main = OpFunction %void None %7
-         %10 = OpLabel
-         %14 = OpAccessChain %_ptr_Uniform_S %u %uint_0
-         %15 = OpLoad %S %14
-               OpReturn
-               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/struct.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/struct.wgsl.expected.wgsl
deleted file mode 100644
index 552be2c..0000000
--- a/test/tint/buffer/uniform/types/struct.wgsl.expected.wgsl
+++ /dev/null

@@ -1,14 +0,0 @@
-struct Inner {
-  f : f32,
-}
-
-struct S {
-  inner : Inner,
-}
-
-@group(0) @binding(0) var<uniform> u : S;
-
-@compute @workgroup_size(1)
-fn main() {
-  let x = u;
-}

diff --git a/test/tint/buffer/uniform/types/struct_f16.wgsl b/test/tint/buffer/uniform/types/struct_f16.wgsl
new file mode 100644
index 0000000..a2610fd
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f16.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+struct Inner {
+  scalar_f16 : f16,
+  vec3_f16 : vec3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+};
+struct S {
+  inner : Inner,
+};
+
+@group(0) @binding(0)
+var<uniform> u : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..0ee746e
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,49 @@
+struct Inner {
+  float16_t scalar_f16;
+  vector<float16_t, 3> vec3_f16;
+  matrix<float16_t, 2, 4> mat2x4_f16;
+};
+struct S {
+  Inner inner;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 2, 4> tint_symbol_4(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+Inner tint_symbol_1(uint4 buffer[2], uint offset) {
+  const uint scalar_offset_bytes = ((offset + 0u));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const Inner tint_symbol_6 = {float16_t(f16tof32(((buffer[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF))), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), tint_symbol_4(buffer, (offset + 16u))};
+  return tint_symbol_6;
+}
+
+S tint_symbol(uint4 buffer[2], uint offset) {
+  const S tint_symbol_7 = {tint_symbol_1(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const S x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..1c11433
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,54 @@
+SKIP: FAILED
+
+struct Inner {
+  float16_t scalar_f16;
+  vector<float16_t, 3> vec3_f16;
+  matrix<float16_t, 2, 4> mat2x4_f16;
+};
+struct S {
+  Inner inner;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[2];
+};
+
+matrix<float16_t, 2, 4> tint_symbol_4(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+}
+
+Inner tint_symbol_1(uint4 buffer[2], uint offset) {
+  const uint scalar_offset_bytes = ((offset + 0u));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const Inner tint_symbol_6 = {float16_t(f16tof32(((buffer[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF))), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), tint_symbol_4(buffer, (offset + 16u))};
+  return tint_symbol_6;
+}
+
+S tint_symbol(uint4 buffer[2], uint offset) {
+  const S tint_symbol_7 = {tint_symbol_1(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const S x = tint_symbol(u, 0u);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000019B8F8CCCD0(2,3-11): error X3000: unrecognized identifier 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..0f63a58
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.glsl

@@ -0,0 +1,47 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct Inner {
+  float16_t scalar_f16;
+  uint pad;
+  f16vec3 vec3_f16;
+  f16mat2x4 mat2x4_f16;
+};
+
+struct Inner_std140 {
+  float16_t scalar_f16;
+  uint pad;
+  f16vec3 vec3_f16;
+  f16vec4 mat2x4_f16_0;
+  f16vec4 mat2x4_f16_1;
+};
+
+struct S {
+  Inner inner;
+};
+
+struct S_std140 {
+  Inner_std140 inner;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  S_std140 inner;
+} u;
+
+Inner conv_Inner(Inner_std140 val) {
+  return Inner(val.scalar_f16, val.pad, val.vec3_f16, f16mat2x4(val.mat2x4_f16_0, val.mat2x4_f16_1));
+}
+
+S conv_S(S_std140 val) {
+  return S(conv_Inner(val.inner));
+}
+
+void tint_symbol() {
+  S x = conv_S(u.inner);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.msl
new file mode 100644
index 0000000..fd9b0f2
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ half scalar_f16;
+  /* 0x0002 */ tint_array<int8_t, 6> tint_pad;
+  /* 0x0008 */ packed_half3 vec3_f16;
+  /* 0x000e */ tint_array<int8_t, 2> tint_pad_1;
+  /* 0x0010 */ half2x4 mat2x4_f16;
+};
+
+struct S {
+  /* 0x0000 */ Inner inner;
+};
+
+kernel void tint_symbol(const constant S* tint_symbol_1 [[buffer(0)]]) {
+  S const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..00afa20
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.spvasm

@@ -0,0 +1,94 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 39
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "inner"
+               OpName %Inner_std140 "Inner_std140"
+               OpMemberName %Inner_std140 0 "scalar_f16"
+               OpMemberName %Inner_std140 1 "vec3_f16"
+               OpMemberName %Inner_std140 2 "mat2x4_f16_0"
+               OpMemberName %Inner_std140 3 "mat2x4_f16_1"
+               OpName %u "u"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "scalar_f16"
+               OpMemberName %Inner 1 "vec3_f16"
+               OpMemberName %Inner 2 "mat2x4_f16"
+               OpName %conv_Inner "conv_Inner"
+               OpName %val "val"
+               OpName %S "S"
+               OpMemberName %S 0 "inner"
+               OpName %conv_S "conv_S"
+               OpName %val_0 "val"
+               OpName %main "main"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 0 Offset 0
+               OpMemberDecorate %Inner_std140 1 Offset 8
+               OpMemberDecorate %Inner_std140 2 Offset 16
+               OpMemberDecorate %Inner_std140 3 Offset 24
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 1 Offset 8
+               OpMemberDecorate %Inner 2 Offset 16
+               OpMemberDecorate %Inner 2 ColMajor
+               OpMemberDecorate %Inner 2 MatrixStride 8
+               OpMemberDecorate %S 0 Offset 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+     %v4half = OpTypeVector %half 4
+%Inner_std140 = OpTypeStruct %half %v3half %v4half %v4half
+   %S_std140 = OpTypeStruct %Inner_std140
+%u_block_std140 = OpTypeStruct %S_std140
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat2v4half = OpTypeMatrix %v4half 2
+      %Inner = OpTypeStruct %half %v3half %mat2v4half
+          %9 = OpTypeFunction %Inner %Inner_std140
+          %S = OpTypeStruct %Inner
+         %21 = OpTypeFunction %S %S_std140
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+ %conv_Inner = OpFunction %Inner None %9
+        %val = OpFunctionParameter %Inner_std140
+         %14 = OpLabel
+         %15 = OpCompositeExtract %half %val 0
+         %16 = OpCompositeExtract %v3half %val 1
+         %17 = OpCompositeExtract %v4half %val 2
+         %18 = OpCompositeExtract %v4half %val 3
+         %19 = OpCompositeConstruct %mat2v4half %17 %18
+         %20 = OpCompositeConstruct %Inner %15 %16 %19
+               OpReturnValue %20
+               OpFunctionEnd
+     %conv_S = OpFunction %S None %21
+      %val_0 = OpFunctionParameter %S_std140
+         %25 = OpLabel
+         %27 = OpCompositeExtract %Inner_std140 %val_0 0
+         %26 = OpFunctionCall %Inner %conv_Inner %27
+         %28 = OpCompositeConstruct %S %26
+               OpReturnValue %28
+               OpFunctionEnd
+       %main = OpFunction %void None %29
+         %32 = OpLabel
+         %37 = OpAccessChain %_ptr_Uniform_S_std140 %u %uint_0
+         %38 = OpLoad %S_std140 %37
+         %33 = OpFunctionCall %S %conv_S %38
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..1e2dd4e
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f16.wgsl.expected.wgsl

@@ -0,0 +1,18 @@
+enable f16;
+
+struct Inner {
+  scalar_f16 : f16,
+  vec3_f16 : vec3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+}
+
+struct S {
+  inner : Inner,
+}
+
+@group(0) @binding(0) var<uniform> u : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/struct_f32.wgsl b/test/tint/buffer/uniform/types/struct_f32.wgsl
new file mode 100644
index 0000000..3620fec
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f32.wgsl

@@ -0,0 +1,16 @@
+struct Inner {
+  scalar_f32 : f32,
+  vec3_f32 : vec3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+};
+struct S {
+  inner : Inner,
+};
+
+@group(0) @binding(0)
+var<uniform> u : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c6a6891
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,36 @@
+struct Inner {
+  float scalar_f32;
+  float3 vec3_f32;
+  float2x4 mat2x4_f32;
+};
+struct S {
+  Inner inner;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+float2x4 tint_symbol_4(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+Inner tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 16u)) / 4;
+  const Inner tint_symbol_6 = {asfloat(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), asfloat(buffer[scalar_offset_3 / 4].xyz), tint_symbol_4(buffer, (offset + 32u))};
+  return tint_symbol_6;
+}
+
+S tint_symbol(uint4 buffer[4], uint offset) {
+  const S tint_symbol_7 = {tint_symbol_1(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const S x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c6a6891
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,36 @@
+struct Inner {
+  float scalar_f32;
+  float3 vec3_f32;
+  float2x4 mat2x4_f32;
+};
+struct S {
+  Inner inner;
+};
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[4];
+};
+
+float2x4 tint_symbol_4(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+}
+
+Inner tint_symbol_1(uint4 buffer[4], uint offset) {
+  const uint scalar_offset_2 = ((offset + 0u)) / 4;
+  const uint scalar_offset_3 = ((offset + 16u)) / 4;
+  const Inner tint_symbol_6 = {asfloat(buffer[scalar_offset_2 / 4][scalar_offset_2 % 4]), asfloat(buffer[scalar_offset_3 / 4].xyz), tint_symbol_4(buffer, (offset + 32u))};
+  return tint_symbol_6;
+}
+
+S tint_symbol(uint4 buffer[4], uint offset) {
+  const S tint_symbol_7 = {tint_symbol_1(buffer, (offset + 0u))};
+  return tint_symbol_7;
+}
+
+[numthreads(1, 1, 1)]
+void main() {
+  const S x = tint_symbol(u, 0u);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..b5a022d
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.glsl

@@ -0,0 +1,29 @@
+#version 310 es
+
+struct Inner {
+  float scalar_f32;
+  uint pad;
+  uint pad_1;
+  uint pad_2;
+  vec3 vec3_f32;
+  uint pad_3;
+  mat2x4 mat2x4_f32;
+};
+
+struct S {
+  Inner inner;
+};
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  S inner;
+} u;
+
+void tint_symbol() {
+  S x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.msl b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.msl
new file mode 100644
index 0000000..ae03b60
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.msl

@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct Inner {
+  /* 0x0000 */ float scalar_f32;
+  /* 0x0004 */ tint_array<int8_t, 12> tint_pad;
+  /* 0x0010 */ packed_float3 vec3_f32;
+  /* 0x001c */ tint_array<int8_t, 4> tint_pad_1;
+  /* 0x0020 */ float2x4 mat2x4_f32;
+};
+
+struct S {
+  /* 0x0000 */ Inner inner;
+};
+
+kernel void tint_symbol(const constant S* tint_symbol_1 [[buffer(0)]]) {
+  S const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..b7e94dd
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.spvasm

@@ -0,0 +1,50 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 19
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "inner"
+               OpName %Inner "Inner"
+               OpMemberName %Inner 0 "scalar_f32"
+               OpMemberName %Inner 1 "vec3_f32"
+               OpMemberName %Inner 2 "mat2x4_f32"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpMemberDecorate %Inner 0 Offset 0
+               OpMemberDecorate %Inner 1 Offset 16
+               OpMemberDecorate %Inner 2 Offset 32
+               OpMemberDecorate %Inner 2 ColMajor
+               OpMemberDecorate %Inner 2 MatrixStride 16
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+%mat2v4float = OpTypeMatrix %v4float 2
+      %Inner = OpTypeStruct %float %v3float %mat2v4float
+          %S = OpTypeStruct %Inner
+    %u_block = OpTypeStruct %S
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_S = OpTypePointer Uniform %S
+       %main = OpFunction %void None %10
+         %13 = OpLabel
+         %17 = OpAccessChain %_ptr_Uniform_S %u %uint_0
+         %18 = OpLoad %S %17
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..9658b07
--- /dev/null
+++ b/test/tint/buffer/uniform/types/struct_f32.wgsl.expected.wgsl

@@ -0,0 +1,16 @@
+struct Inner {
+  scalar_f32 : f32,
+  vec3_f32 : vec3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+}
+
+struct S {
+  inner : Inner,
+}
+
+@group(0) @binding(0) var<uniform> u : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_f16.wgsl b/test/tint/buffer/uniform/types/vec2_f16.wgsl
new file mode 100644
index 0000000..17cb8b7
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : vec2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..6449cb7
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,10 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint ubo_load = u[0].x;
+  const vector<float16_t, 2> x = vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16)));
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f8f7aad
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,15 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint ubo_load = u[0].x;
+  const vector<float16_t, 2> x = vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000244450436F0(8,16-24): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..99cbb53
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.glsl

@@ -0,0 +1,16 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  f16vec2 inner;
+} u;
+
+void tint_symbol() {
+  f16vec2 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.msl
new file mode 100644
index 0000000..80341d9
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half2* tint_symbol_1 [[buffer(0)]]) {
+  half2 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..73bbc7e
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.spvasm

@@ -0,0 +1,38 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 15
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+    %u_block = OpTypeStruct %v2half
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %main = OpFunction %void None %6
+          %9 = OpLabel
+         %13 = OpAccessChain %_ptr_Uniform_v2half %u %uint_0
+         %14 = OpLoad %v2half %13
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..9f30580
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : vec2<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_f32.wgsl b/test/tint/buffer/uniform/types/vec2_f32.wgsl
new file mode 100644
index 0000000..4648a8d
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f32.wgsl

@@ -0,0 +1,7 @@
+@group(0) @binding(0)
+var<uniform> u : vec2<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..06f83a5
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float2 x = asfloat(u[0].xy);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..06f83a5
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float2 x = asfloat(u[0].xy);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..39bd9ac
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.glsl

@@ -0,0 +1,15 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  vec2 inner;
+} u;
+
+void tint_symbol() {
+  vec2 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.msl
new file mode 100644
index 0000000..f2e48b1
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant float2* tint_symbol_1 [[buffer(0)]]) {
+  float2 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..ba6ed42
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.spvasm

@@ -0,0 +1,34 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 15
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %u_block = OpTypeStruct %v2float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %main = OpFunction %void None %6
+          %9 = OpLabel
+         %13 = OpAccessChain %_ptr_Uniform_v2float %u %uint_0
+         %14 = OpLoad %v2float %13
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..d9315ab
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_f32.wgsl.expected.wgsl

@@ -0,0 +1,6 @@
+@group(0) @binding(0) var<uniform> u : vec2<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec2.wgsl b/test/tint/buffer/uniform/types/vec2_i32.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec2.wgsl
rename to test/tint/buffer/uniform/types/vec2_i32.wgsl


diff --git a/test/tint/buffer/uniform/types/vec2.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec2.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/types/vec2.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec2.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/types/vec2.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec2.wgsl.expected.glsl
rename to test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/types/vec2.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec2.wgsl.expected.msl
rename to test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/types/vec2.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/types/vec2.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/types/vec2.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec2.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/types/vec2_i32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/types/vec2_u32.wgsl b/test/tint/buffer/uniform/types/vec2_u32.wgsl
new file mode 100644
index 0000000..ccccb3a
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_u32.wgsl

@@ -0,0 +1,7 @@
+@group(0) @binding(0)
+var<uniform> u : vec2<u32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..c96708b
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const uint2 x = u[0].xy;
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..c96708b
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const uint2 x = u[0].xy;
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.glsl
new file mode 100644
index 0000000..32eedc5
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.glsl

@@ -0,0 +1,15 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  uvec2 inner;
+} u;
+
+void tint_symbol() {
+  uvec2 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.msl
new file mode 100644
index 0000000..44fcfb2
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant uint2* tint_symbol_1 [[buffer(0)]]) {
+  uint2 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.spvasm
new file mode 100644
index 0000000..e8cfed9
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.spvasm

@@ -0,0 +1,33 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 14
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+     %v2uint = OpTypeVector %uint 2
+    %u_block = OpTypeStruct %v2uint
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
+       %main = OpFunction %void None %6
+          %9 = OpLabel
+         %12 = OpAccessChain %_ptr_Uniform_v2uint %u %uint_0
+         %13 = OpLoad %v2uint %12
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.wgsl
new file mode 100644
index 0000000..d2a992c
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec2_u32.wgsl.expected.wgsl

@@ -0,0 +1,6 @@
+@group(0) @binding(0) var<uniform> u : vec2<u32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_f16.wgsl b/test/tint/buffer/uniform/types/vec3_f16.wgsl
new file mode 100644
index 0000000..a1eee50
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : vec3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..be2f0a95
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,12 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint2 ubo_load = u[0].xy;
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const vector<float16_t, 3> x = vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..52d3b87
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint2 ubo_load = u[0].xy;
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const vector<float16_t, 3> x = vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001734B456BC0(8,10-18): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001734B456BC0(9,3-11): error X3000: unrecognized identifier 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001734B456BC0(9,13-22): error X3000: unrecognized identifier 'ubo_load_y'
+

diff --git a/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..c1eb385
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.glsl

@@ -0,0 +1,16 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  f16vec3 inner;
+} u;
+
+void tint_symbol() {
+  f16vec3 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.msl
new file mode 100644
index 0000000..ce6a7ba
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half3* tint_symbol_1 [[buffer(0)]]) {
+  half3 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..c0115b9
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.spvasm

@@ -0,0 +1,38 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 15
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+    %u_block = OpTypeStruct %v3half
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+       %main = OpFunction %void None %6
+          %9 = OpLabel
+         %13 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0
+         %14 = OpLoad %v3half %13
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..aecf4fc
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : vec3<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_f32.wgsl b/test/tint/buffer/uniform/types/vec3_f32.wgsl
new file mode 100644
index 0000000..eea6ffb
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f32.wgsl

@@ -0,0 +1,7 @@
+@group(0) @binding(0)
+var<uniform> u : vec3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..bd50e30
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float3 x = asfloat(u[0].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..bd50e30
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const float3 x = asfloat(u[0].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.glsl
new file mode 100644
index 0000000..8d178e7
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.glsl

@@ -0,0 +1,15 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  vec3 inner;
+} u;
+
+void tint_symbol() {
+  vec3 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.msl
new file mode 100644
index 0000000..b354a26
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant float3* tint_symbol_1 [[buffer(0)]]) {
+  float3 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.spvasm
new file mode 100644
index 0000000..1399f22
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.spvasm

@@ -0,0 +1,34 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 15
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+    %u_block = OpTypeStruct %v3float
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+       %main = OpFunction %void None %6
+          %9 = OpLabel
+         %13 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0
+         %14 = OpLoad %v3float %13
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.wgsl
new file mode 100644
index 0000000..fd8b627
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_f32.wgsl.expected.wgsl

@@ -0,0 +1,6 @@
+@group(0) @binding(0) var<uniform> u : vec3<f32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_i32.wgsl b/test/tint/buffer/uniform/types/vec3_i32.wgsl
new file mode 100644
index 0000000..5221b76
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_i32.wgsl

@@ -0,0 +1,7 @@
+@group(0) @binding(0)
+var<uniform> u : vec3<i32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..f49da65
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const int3 x = asint(u[0].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..f49da65
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const int3 x = asint(u[0].xyz);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.glsl
new file mode 100644
index 0000000..e0eb3c4
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.glsl

@@ -0,0 +1,15 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  ivec3 inner;
+} u;
+
+void tint_symbol() {
+  ivec3 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.msl
new file mode 100644
index 0000000..e014902
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant int3* tint_symbol_1 [[buffer(0)]]) {
+  int3 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.spvasm
new file mode 100644
index 0000000..5da4eb3
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.spvasm

@@ -0,0 +1,34 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 15
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %v3int = OpTypeVector %int 3
+    %u_block = OpTypeStruct %v3int
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3int = OpTypePointer Uniform %v3int
+       %main = OpFunction %void None %6
+          %9 = OpLabel
+         %13 = OpAccessChain %_ptr_Uniform_v3int %u %uint_0
+         %14 = OpLoad %v3int %13
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.wgsl
new file mode 100644
index 0000000..db38609
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec3_i32.wgsl.expected.wgsl

@@ -0,0 +1,6 @@
+@group(0) @binding(0) var<uniform> u : vec3<i32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec3.wgsl b/test/tint/buffer/uniform/types/vec3_u32.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec3.wgsl
rename to test/tint/buffer/uniform/types/vec3_u32.wgsl


diff --git a/test/tint/buffer/uniform/types/vec3.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec3.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/types/vec3.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec3.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/types/vec3.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec3.wgsl.expected.glsl
rename to test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/types/vec3.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec3.wgsl.expected.msl
rename to test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/types/vec3.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/types/vec3.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/types/vec3.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec3.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/types/vec3_u32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/types/vec4_f16.wgsl b/test/tint/buffer/uniform/types/vec4_f16.wgsl
new file mode 100644
index 0000000..bf93dc0
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_f16.wgsl

@@ -0,0 +1,9 @@
+enable f16;
+
+@group(0) @binding(0)
+var<uniform> u : vec4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..b4d4152
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.dxc.hlsl

@@ -0,0 +1,12 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint2 ubo_load = u[0].xy;
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const vector<float16_t, 4> x = vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..525facb
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.fxc.hlsl

@@ -0,0 +1,19 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  uint2 ubo_load = u[0].xy;
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const vector<float16_t, 4> x = vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001D83CC52880(8,10-18): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001D83CC52880(9,10-18): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001D83CC52880(10,16-24): error X3000: syntax error: unexpected token 'float16_t'
+

diff --git a/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.glsl
new file mode 100644
index 0000000..4b67acc
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.glsl

@@ -0,0 +1,16 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  f16vec4 inner;
+} u;
+
+void tint_symbol() {
+  f16vec4 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.msl
new file mode 100644
index 0000000..d8a8bfc
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant half4* tint_symbol_1 [[buffer(0)]]) {
+  half4 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.spvasm
new file mode 100644
index 0000000..454bc06
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.spvasm

@@ -0,0 +1,38 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 15
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v4half = OpTypeVector %half 4
+    %u_block = OpTypeStruct %v4half
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4half = OpTypePointer Uniform %v4half
+       %main = OpFunction %void None %6
+          %9 = OpLabel
+         %13 = OpAccessChain %_ptr_Uniform_v4half %u %uint_0
+         %14 = OpLoad %v4half %13
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.wgsl
new file mode 100644
index 0000000..5fb6d20
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_f16.wgsl.expected.wgsl

@@ -0,0 +1,8 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : vec4<f16>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec4.wgsl b/test/tint/buffer/uniform/types/vec4_f32.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec4.wgsl
rename to test/tint/buffer/uniform/types/vec4_f32.wgsl


diff --git a/test/tint/buffer/uniform/types/vec4.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.dxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec4.wgsl.expected.dxc.hlsl
rename to test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.dxc.hlsl


diff --git a/test/tint/buffer/uniform/types/vec4.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.fxc.hlsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec4.wgsl.expected.fxc.hlsl
rename to test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.fxc.hlsl


diff --git a/test/tint/buffer/uniform/types/vec4.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.glsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec4.wgsl.expected.glsl
rename to test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.glsl


diff --git a/test/tint/buffer/uniform/types/vec4.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.msl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec4.wgsl.expected.msl
rename to test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.msl


diff --git a/test/tint/buffer/uniform/types/vec4.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.spvasm
similarity index 100%
rename from test/tint/buffer/uniform/types/vec4.wgsl.expected.spvasm
rename to test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.spvasm


diff --git a/test/tint/buffer/uniform/types/vec4.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.wgsl
similarity index 100%
rename from test/tint/buffer/uniform/types/vec4.wgsl.expected.wgsl
rename to test/tint/buffer/uniform/types/vec4_f32.wgsl.expected.wgsl


diff --git a/test/tint/buffer/uniform/types/vec4_i32.wgsl b/test/tint/buffer/uniform/types/vec4_i32.wgsl
new file mode 100644
index 0000000..aa8c5b1
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_i32.wgsl

@@ -0,0 +1,7 @@
+@group(0) @binding(0)
+var<uniform> u : vec4<i32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..a3efa9b
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const int4 x = asint(u[0]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..a3efa9b
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const int4 x = asint(u[0]);
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.glsl
new file mode 100644
index 0000000..2b41fc8
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.glsl

@@ -0,0 +1,15 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  ivec4 inner;
+} u;
+
+void tint_symbol() {
+  ivec4 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.msl
new file mode 100644
index 0000000..7bc39f7
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant int4* tint_symbol_1 [[buffer(0)]]) {
+  int4 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.spvasm
new file mode 100644
index 0000000..95a491e
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.spvasm

@@ -0,0 +1,34 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 15
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+        %int = OpTypeInt 32 1
+      %v4int = OpTypeVector %int 4
+    %u_block = OpTypeStruct %v4int
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
+       %main = OpFunction %void None %6
+          %9 = OpLabel
+         %13 = OpAccessChain %_ptr_Uniform_v4int %u %uint_0
+         %14 = OpLoad %v4int %13
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.wgsl
new file mode 100644
index 0000000..ec8838c
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_i32.wgsl.expected.wgsl

@@ -0,0 +1,6 @@
+@group(0) @binding(0) var<uniform> u : vec4<i32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_u32.wgsl b/test/tint/buffer/uniform/types/vec4_u32.wgsl
new file mode 100644
index 0000000..a01c90e
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_u32.wgsl

@@ -0,0 +1,7 @@
+@group(0) @binding(0)
+var<uniform> u : vec4<u32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.dxc.hlsl b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.dxc.hlsl
new file mode 100644
index 0000000..686add5
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.dxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const uint4 x = u[0];
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.fxc.hlsl b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.fxc.hlsl
new file mode 100644
index 0000000..686add5
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.fxc.hlsl

@@ -0,0 +1,9 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[1];
+};
+
+[numthreads(1, 1, 1)]
+void main() {
+  const uint4 x = u[0];
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.glsl b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.glsl
new file mode 100644
index 0000000..84be7e2
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.glsl

@@ -0,0 +1,15 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  uvec4 inner;
+} u;
+
+void tint_symbol() {
+  uvec4 x = u.inner;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.msl b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.msl
new file mode 100644
index 0000000..e7c1bfe
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.msl

@@ -0,0 +1,8 @@
+#include <metal_stdlib>
+
+using namespace metal;
+kernel void tint_symbol(const constant uint4* tint_symbol_1 [[buffer(0)]]) {
+  uint4 const x = *(tint_symbol_1);
+  return;
+}
+

diff --git a/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.spvasm b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.spvasm
new file mode 100644
index 0000000..a48eca4
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.spvasm

@@ -0,0 +1,33 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 14
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %main "main"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+     %v4uint = OpTypeVector %uint 4
+    %u_block = OpTypeStruct %v4uint
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint
+       %main = OpFunction %void None %6
+          %9 = OpLabel
+         %12 = OpAccessChain %_ptr_Uniform_v4uint %u %uint_0
+         %13 = OpLoad %v4uint %12
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.wgsl b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.wgsl
new file mode 100644
index 0000000..92c2b03
--- /dev/null
+++ b/test/tint/buffer/uniform/types/vec4_u32.wgsl.expected.wgsl

@@ -0,0 +1,6 @@
+@group(0) @binding(0) var<uniform> u : vec4<u32>;
+
+@compute @workgroup_size(1)
+fn main() {
+  let x = u;
+}
commit	776b221ae2e337b2b3410456c3e4fa95a5c93dae	[log] [tgz]
author	Zhaoming Jiang <zhaoming.jiang@intel.com>	Wed Nov 30 02:47:27 2022 +0000
committer	Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com>	Wed Nov 30 02:47:27 2022 +0000
tree	4f3937c884d3c0c0e0b85c8ad349c04a25185543
parent	205e16de63a2a642394202ab57a06ff07273064d [diff]