Tint: Implement f16 in uniform and storage address space

This CL implements f16 in uniform and storage address space, allowing
using f16 types in uniform and storage buffers on all backends. Tint
uint tests and Dawn E2E tests are added to validate the f16 types work
as expected.

Bug: tint:1473, tint:1502
Change-Id: I15e3de1033d3727f2ea33f4657f682c5f13c2153
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/106320
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
diff --git a/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp b/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp
index 0ceca8d..79cacd9 100644
--- a/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp
+++ b/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp
@@ -96,7 +96,17 @@
 //   3. "Padding": Add `size` bytes of padding bytes into buffer;
 //   4. "FillingFixed": Fill all `size` given (fixed) bytes into the memory buffer.
 // Note that data bytes and padding bytes are generated seperatedly and designed to
-// be distinguishable, i.e. data bytes have MSB set to 0 while padding bytes 1.
+// be distinguishable, i.e. data bytes have the second most significant bit set to 0 while padding
+// bytes 1.
+// We don't want testing data includes NaN or Inf, because according to WGSL spec an implementation
+// may give indeterminate value if a expression evaluated to NaN or Inf, and in Tint generated
+// HLSL reading a f16 NaN from buffer is not bit-pattern preserved (i.e. a NaN input may be changed
+// to another NaN with different bit pattern). In bit representation of both f32 and f16, the first
+// (most significant) bit is sign bit, and some biased exponent bits go after it (start from the
+// second most significant bit). A float value is NaN or Inf if and only if all its exponent bits
+// are 1. By setting the second most significant bit of every data byte to 0, we ensure that the
+// second most significant bit of any float data in the buffer is 0, and therefore avoid generating
+// NaN or Inf float datas.
 class MemoryDataBuilder {
   public:
     // Record a "Align" operation
@@ -150,15 +160,20 @@
                                  uint8_t paddingXorKey) {
         uint8_t dataByte = 0x0u;
         uint8_t paddingByte = 0x2u;
-        // Get a data byte with MSB set to 0.
+        // Padding mask, setting the second most significant bit to 1
+        constexpr uint8_t paddingMask = 0x40u;
+        // Data mask, masking the second most significant bit to 0, distinguished from padding
+        // bytes and avoid NaN or Inf.
+        constexpr uint8_t dataMask = ~paddingMask;
+        // Get a data byte
         auto NextDataByte = [&]() {
             dataByte += 0x11u;
-            return static_cast<uint8_t>((dataByte ^ dataXorKey) & 0x7fu);
+            return static_cast<uint8_t>((dataByte ^ dataXorKey) & dataMask);
         };
-        // Get a padding byte with MSB set to 1, distinguished from data bytes.
+        // Get a padding byte
         auto NextPaddingByte = [&]() {
             paddingByte += 0x13u;
-            return static_cast<uint8_t>((paddingByte ^ paddingXorKey) | 0x80u);
+            return static_cast<uint8_t>((paddingByte ^ paddingXorKey) | paddingMask);
         };
         for (auto& operation : mOperations) {
             switch (operation.mType) {
@@ -234,10 +249,11 @@
   public:
     // Constructor with WGSL type name, natural alignment and natural size. Set mStrideDataBytes to
     // natural size and mStridePaddingBytes to 0 by default to indicate continious data part.
-    Field(std::string wgslType, size_t align, size_t size)
+    Field(std::string wgslType, size_t align, size_t size, bool requireF16Feature)
         : mWGSLType(wgslType),
           mAlign(align),
           mSize(size),
+          mRequireF16Feature(requireF16Feature),
           mStrideDataBytes(size),
           mStridePaddingBytes(0) {}
 
@@ -247,6 +263,7 @@
     size_t GetUnpaddedSize() const { return mSize; }
     // The padded size determined by @size attribute if existed, otherwise the natural size
     size_t GetPaddedSize() const { return mHasSizeAttribute ? mPaddedSize : mSize; }
+    bool IsRequireF16Feature() const { return mRequireF16Feature; }
 
     // Applies a @size attribute, sets the mPaddedSize to value.
     // Returns this Field so calls can be chained.
@@ -337,7 +354,8 @@
 
     // Helper function to build a Field describing a scalar type.
     static Field Scalar(ScalarType type) {
-        return Field(ScalarTypeName(type), ScalarTypeSize(type), ScalarTypeSize(type));
+        return Field(ScalarTypeName(type), ScalarTypeSize(type), ScalarTypeSize(type),
+                     type == ScalarType::f16);
     }
 
     // Helper function to build a Field describing a vector type.
@@ -347,7 +365,7 @@
         size_t vectorSize = n * elementSize;
         size_t vectorAlignment = (n == 3 ? 4 : n) * elementSize;
         return Field{"vec" + std::to_string(n) + "<" + ScalarTypeName(type) + ">", vectorAlignment,
-                     vectorSize};
+                     vectorSize, type == ScalarType::f16};
     }
 
     // Helper function to build a Field describing a matrix type.
@@ -360,7 +378,7 @@
         size_t colVectorAlignment = (row == 3 ? 4 : row) * elementSize;
         Field field = Field{"mat" + std::to_string(col) + "x" + std::to_string(row) + "<" +
                                 ScalarTypeName(type) + ">",
-                            colVectorAlignment, col * colVectorAlignment};
+                            colVectorAlignment, col * colVectorAlignment, type == ScalarType::f16};
         if (colVectorSize != colVectorAlignment) {
             field.Strided(colVectorSize, colVectorAlignment - colVectorSize);
         }
@@ -371,6 +389,7 @@
     const std::string mWGSLType;  // Friendly WGSL name of the type of the field
     size_t mAlign;       // Alignment of the type in bytes, can be change by @align attribute
     const size_t mSize;  // Natural size of the type in bytes
+    const bool mRequireF16Feature;
 
     bool mHasAlignAttribute = false;
     bool mHasSizeAttribute = false;
@@ -392,6 +411,25 @@
     return o;
 }
 
+std::ostream& operator<<(std::ostream& o, const std::vector<uint8_t>& byteBuffer) {
+    o << "\n";
+    uint32_t i = 0;
+    for (auto byte : byteBuffer) {
+        o << std::hex << std::setw(2) << std::setfill('0') << uint32_t(byte);
+        if (i < 31) {
+            o << " ";
+            i++;
+        } else {
+            o << "\n";
+            i = 0;
+        }
+    }
+    if (i != 0) {
+        o << "\n";
+    }
+    return o;
+}
+
 // Create a compute pipeline with all buffer in bufferList binded in order starting from slot 0, and
 // run the given shader.
 void RunComputeShaderWithBuffers(const wgpu::Device& device,
@@ -445,7 +483,40 @@
 
 class ComputeLayoutMemoryBufferTests
     : public DawnTestWithParams<ComputeLayoutMemoryBufferTestParams> {
-    void SetUp() override { DawnTestBase::SetUp(); }
+    // void SetUp() override { DawnTestBase::SetUp(); }
+
+  protected:
+    // Require f16 feature if possible
+    std::vector<wgpu::FeatureName> GetRequiredFeatures() override {
+        mIsShaderF16SupportedOnAdapter = SupportsFeatures({wgpu::FeatureName::ShaderF16});
+        if (!mIsShaderF16SupportedOnAdapter) {
+            return {};
+        }
+
+        if (!IsD3D12()) {
+            mUseDxcEnabledOrNonD3D12 = true;
+        } else {
+            for (auto* enabledToggle : GetParam().forceEnabledWorkarounds) {
+                if (strncmp(enabledToggle, "use_dxc", 7) == 0) {
+                    mUseDxcEnabledOrNonD3D12 = true;
+                    break;
+                }
+            }
+        }
+
+        if (mUseDxcEnabledOrNonD3D12) {
+            return {wgpu::FeatureName::ShaderF16};
+        }
+
+        return {};
+    }
+
+    bool IsShaderF16SupportedOnAdapter() const { return mIsShaderF16SupportedOnAdapter; }
+    bool UseDxcEnabledOrNonD3D12() const { return mUseDxcEnabledOrNonD3D12; }
+
+  private:
+    bool mIsShaderF16SupportedOnAdapter = false;
+    bool mUseDxcEnabledOrNonD3D12 = false;
 };
 
 // Align returns the WGSL decoration for an explicit structure field alignment
@@ -472,9 +543,14 @@
 
     const Field& field = GetParam().mField;
 
+    if (field.IsRequireF16Feature() && !device.HasFeature(wgpu::FeatureName::ShaderF16)) {
+        return;
+    }
+
     const bool isUniform = GetParam().mAddressSpace == AddressSpace::Uniform;
 
-    std::string shader = R"(
+    std::string shader = std::string(field.IsRequireF16Feature() ? "enable f16;" : "") +
+                         R"(
 struct Data {
     header : u32,
     @align({field_align}) @size({field_size}) field : {field_type},
@@ -553,6 +629,7 @@
         {
             inputDataBuilder.AddFixedU32(kDataHeaderCode);           // Input.data.header
             inputDataBuilder.AddSubBuilder(field.GetDataBuilder());  // Input.data.field
+            inputDataBuilder.AlignTo(4);                             // Input.data.footer alignment
             inputDataBuilder.AddFixedU32(kDataFooterCode);           // Input.data.footer
             inputDataBuilder.AlignTo(field.GetAlign());              // Input.data padding
         }
@@ -563,6 +640,7 @@
 
     MemoryDataBuilder expectedDataBuilder;  // The expected data to be copied by the shader
     expectedDataBuilder.AddSubBuilder(field.GetDataBuilder());
+    expectedDataBuilder.AlignTo(4);  // Storage buffer size must be a multiple of 4
 
     // Expectation and input buffer have identical data bytes but different padding bytes.
     // Initializes the dst buffer with data bytes different from input and expectation, and padding
@@ -603,25 +681,36 @@
     EXPECT_BUFFER_U32_EQ(kStatusOk, statusBuf, 0) << "status code error" << std::endl
                                                   << "Shader: " << shader;
 
-    // Check the data
+    // Check the data. Note that MemoryDataBuilder avoid generating NaN and Inf floating point data,
+    // whose bit pattern will not get preserved when reading from buffer (arbitrary NaNs may be
+    // silently transformed into a quiet NaN). Having NaN and Inf floating point data in input may
+    // result in bitwise mismatch.
     field.CheckData([&](uint32_t offset, uint32_t size) {
         EXPECT_BUFFER_U8_RANGE_EQ(expectedData.data() + offset, outputBuf, offset, size)
-            << "offset: " << offset;
+            << "offset: " << offset << "\n Input buffer:" << inputData << "Shader:\n"
+            << shader << "\n";
     });
 }
 
 // Test different types that used directly as buffer type
 TEST_P(ComputeLayoutMemoryBufferTests, NonStructMember) {
     auto params = GetParam();
+
     Field& field = params.mField;
+
     // @size and @align attribute only apply to struct members, skip them
     if (field.HasSizeAttribute() || field.HasAlignAttribute()) {
         return;
     }
 
+    if (field.IsRequireF16Feature() && !device.HasFeature(wgpu::FeatureName::ShaderF16)) {
+        return;
+    }
+
     const bool isUniform = GetParam().mAddressSpace == AddressSpace::Uniform;
 
-    std::string shader = R"(
+    std::string shader = std::string(field.IsRequireF16Feature() ? "enable f16;" : "") +
+                         R"(
 @group(0) @binding(0) var<{input_qualifiers}> input : {field_type};
 @group(0) @binding(1) var<storage, read_write> output : {field_type};
 
@@ -638,10 +727,11 @@
     // Build the input and expected data.
     MemoryDataBuilder dataBuilder;
     dataBuilder.AddSubBuilder(field.GetDataBuilder());
+    dataBuilder.AlignTo(4);  // Storage buffer size must be a multiple of 4
 
     // Expectation and input buffer have identical data bytes but different padding bytes.
-    // Initializes the dst buffer with data bytes different from input and expectation, and padding
-    // bytes identical to expectation but different from input.
+    // Initializes the dst buffer with data bytes different from input and expectation, and
+    // padding bytes identical to expectation but different from input.
     constexpr uint8_t dataKeyForInputAndExpectation = 0x00u;
     constexpr uint8_t dataKeyForDstInit = 0xffu;
     constexpr uint8_t paddingKeyForInput = 0x3fu;
@@ -669,10 +759,14 @@
 
     RunComputeShaderWithBuffers(device, queue, shader, {inputBuf, outputBuf});
 
-    // Check the data
+    // Check the data. Note that MemoryDataBuilder avoid generating NaN and Inf floating point data,
+    // whose bit pattern will not get preserved when reading from buffer (arbitrary NaNs may be
+    // silently transformed into a quiet NaN). Having NaN and Inf floating point data in input may
+    // result in bitwise mismatch.
     field.CheckData([&](uint32_t offset, uint32_t size) {
         EXPECT_BUFFER_U8_RANGE_EQ(expectedData.data() + offset, outputBuf, offset, size)
-            << "offset: " << offset;
+            << "offset: " << offset << "\n Input buffer:" << inputData << "Shader:\n"
+            << shader << "\n";
     });
 }
 
@@ -680,6 +774,7 @@
     auto params = MakeParamGenerator<ComputeLayoutMemoryBufferTestParams>(
         {
             D3D12Backend(),
+            D3D12Backend({"use_dxc"}),
             MetalBackend(),
             VulkanBackend(),
             OpenGLBackend(),
@@ -692,16 +787,19 @@
             Field::Scalar(ScalarType::f32),
             Field::Scalar(ScalarType::i32),
             Field::Scalar(ScalarType::u32),
+            Field::Scalar(ScalarType::f16),
 
             // Scalar types with custom alignment
             Field::Scalar(ScalarType::f32).AlignAttribute(16),
             Field::Scalar(ScalarType::i32).AlignAttribute(16),
             Field::Scalar(ScalarType::u32).AlignAttribute(16),
+            Field::Scalar(ScalarType::f16).AlignAttribute(16),
 
             // Scalar types with custom size
             Field::Scalar(ScalarType::f32).SizeAttribute(24),
             Field::Scalar(ScalarType::i32).SizeAttribute(24),
             Field::Scalar(ScalarType::u32).SizeAttribute(24),
+            Field::Scalar(ScalarType::f16).SizeAttribute(24),
 
             // Vector types with no custom alignment or size
             Field::Vector(2, ScalarType::f32),
@@ -713,6 +811,9 @@
             Field::Vector(2, ScalarType::u32),
             Field::Vector(3, ScalarType::u32),
             Field::Vector(4, ScalarType::u32),
+            Field::Vector(2, ScalarType::f16),
+            Field::Vector(3, ScalarType::f16),
+            Field::Vector(4, ScalarType::f16),
 
             // Vector types with custom alignment
             Field::Vector(2, ScalarType::f32).AlignAttribute(32),
@@ -724,6 +825,9 @@
             Field::Vector(2, ScalarType::u32).AlignAttribute(32),
             Field::Vector(3, ScalarType::u32).AlignAttribute(32),
             Field::Vector(4, ScalarType::u32).AlignAttribute(32),
+            Field::Vector(2, ScalarType::f16).AlignAttribute(32),
+            Field::Vector(3, ScalarType::f16).AlignAttribute(32),
+            Field::Vector(4, ScalarType::f16).AlignAttribute(32),
 
             // Vector types with custom size
             Field::Vector(2, ScalarType::f32).SizeAttribute(24),
@@ -735,6 +839,9 @@
             Field::Vector(2, ScalarType::u32).SizeAttribute(24),
             Field::Vector(3, ScalarType::u32).SizeAttribute(24),
             Field::Vector(4, ScalarType::u32).SizeAttribute(24),
+            Field::Vector(2, ScalarType::f16).SizeAttribute(24),
+            Field::Vector(3, ScalarType::f16).SizeAttribute(24),
+            Field::Vector(4, ScalarType::f16).SizeAttribute(24),
 
             // Matrix types with no custom alignment or size
             Field::Matrix(2, 2, ScalarType::f32),
@@ -746,6 +853,15 @@
             Field::Matrix(2, 4, ScalarType::f32),
             Field::Matrix(3, 4, ScalarType::f32),
             Field::Matrix(4, 4, ScalarType::f32),
+            Field::Matrix(2, 2, ScalarType::f16),
+            Field::Matrix(3, 2, ScalarType::f16),
+            Field::Matrix(4, 2, ScalarType::f16),
+            Field::Matrix(2, 3, ScalarType::f16),
+            Field::Matrix(3, 3, ScalarType::f16),
+            Field::Matrix(4, 3, ScalarType::f16),
+            Field::Matrix(2, 4, ScalarType::f16),
+            Field::Matrix(3, 4, ScalarType::f16),
+            Field::Matrix(4, 4, ScalarType::f16),
 
             // Matrix types with custom alignment
             Field::Matrix(2, 2, ScalarType::f32).AlignAttribute(32),
@@ -757,6 +873,15 @@
             Field::Matrix(2, 4, ScalarType::f32).AlignAttribute(32),
             Field::Matrix(3, 4, ScalarType::f32).AlignAttribute(32),
             Field::Matrix(4, 4, ScalarType::f32).AlignAttribute(32),
+            Field::Matrix(2, 2, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(3, 2, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(4, 2, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(2, 3, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(3, 3, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(4, 3, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(2, 4, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(3, 4, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(4, 4, ScalarType::f16).AlignAttribute(32),
 
             // Matrix types with custom size
             Field::Matrix(2, 2, ScalarType::f32).SizeAttribute(128),
@@ -768,85 +893,241 @@
             Field::Matrix(2, 4, ScalarType::f32).SizeAttribute(128),
             Field::Matrix(3, 4, ScalarType::f32).SizeAttribute(128),
             Field::Matrix(4, 4, ScalarType::f32).SizeAttribute(128),
+            Field::Matrix(2, 2, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(3, 2, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(4, 2, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(2, 3, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(3, 3, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(4, 3, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(2, 4, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(3, 4, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(4, 4, ScalarType::f16).SizeAttribute(128),
 
             // Array types with no custom alignment or size.
-            // Note: The use of StorageBufferOnly() is due to UBOs requiring 16 byte alignment
-            // of array elements. See https://www.w3.org/TR/WGSL/#storage-class-constraints
-            Field("array<u32, 1>", /* align */ 4, /* size */ 4).StorageBufferOnly(),
-            Field("array<u32, 2>", /* align */ 4, /* size */ 8).StorageBufferOnly(),
-            Field("array<u32, 3>", /* align */ 4, /* size */ 12).StorageBufferOnly(),
-            Field("array<u32, 4>", /* align */ 4, /* size */ 16).StorageBufferOnly(),
-            Field("array<vec2<u32>, 1>", /* align */ 8, /* size */ 8).StorageBufferOnly(),
-            Field("array<vec2<u32>, 2>", /* align */ 8, /* size */ 16).StorageBufferOnly(),
-            Field("array<vec2<u32>, 3>", /* align */ 8, /* size */ 24).StorageBufferOnly(),
-            Field("array<vec2<u32>, 4>", /* align */ 8, /* size */ 32).StorageBufferOnly(),
-            Field("array<vec3<u32>, 1>", /* align */ 16, /* size */ 16).Strided(12, 4),
-            Field("array<vec3<u32>, 2>", /* align */ 16, /* size */ 32).Strided(12, 4),
-            Field("array<vec3<u32>, 3>", /* align */ 16, /* size */ 48).Strided(12, 4),
-            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64).Strided(12, 4),
-            Field("array<vec4<u32>, 1>", /* align */ 16, /* size */ 16),
-            Field("array<vec4<u32>, 2>", /* align */ 16, /* size */ 32),
-            Field("array<vec4<u32>, 3>", /* align */ 16, /* size */ 48),
-            Field("array<vec4<u32>, 4>", /* align */ 16, /* size */ 64),
+            // Note: The use of StorageBufferOnly() is due to UBOs requiring 16 byte
+            // alignment of array elements. See
+            // https://www.w3.org/TR/WGSL/#storage-class-constraints
+            Field("array<u32, 1>", /* align */ 4, /* size */ 4, /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<u32, 2>", /* align */ 4, /* size */ 8, /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<u32, 3>", /* align */ 4, /* size */ 12, /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<u32, 4>", /* align */ 4, /* size */ 16, /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<vec2<u32>, 1>", /* align */ 8, /* size */ 8, /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<vec2<u32>, 2>", /* align */ 8, /* size */ 16,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<vec2<u32>, 3>", /* align */ 8, /* size */ 24,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<vec2<u32>, 4>", /* align */ 8, /* size */ 32,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<vec3<u32>, 1>", /* align */ 16, /* size */ 16,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<vec3<u32>, 2>", /* align */ 16, /* size */ 32,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<vec3<u32>, 3>", /* align */ 16, /* size */ 48,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<vec4<u32>, 1>", /* align */ 16, /* size */ 16,
+                  /* requireF16Feature */ false),
+            Field("array<vec4<u32>, 2>", /* align */ 16, /* size */ 32,
+                  /* requireF16Feature */ false),
+            Field("array<vec4<u32>, 3>", /* align */ 16, /* size */ 48,
+                  /* requireF16Feature */ false),
+            Field("array<vec4<u32>, 4>", /* align */ 16, /* size */ 64,
+                  /* requireF16Feature */ false),
 
             // Array types with custom alignment
-            Field("array<u32, 1>", /* align */ 4, /* size */ 4)
+            Field("array<u32, 1>", /* align */ 4, /* size */ 4, /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<u32, 2>", /* align */ 4, /* size */ 8)
+            Field("array<u32, 2>", /* align */ 4, /* size */ 8, /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<u32, 3>", /* align */ 4, /* size */ 12)
+            Field("array<u32, 3>", /* align */ 4, /* size */ 12, /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<u32, 4>", /* align */ 4, /* size */ 16)
+            Field("array<u32, 4>", /* align */ 4, /* size */ 16, /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<vec2<u32>, 1>", /* align */ 8, /* size */ 8)
+            Field("array<vec2<u32>, 1>", /* align */ 8, /* size */ 8, /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<vec2<u32>, 2>", /* align */ 8, /* size */ 16)
+            Field("array<vec2<u32>, 2>", /* align */ 8, /* size */ 16,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<vec2<u32>, 3>", /* align */ 8, /* size */ 24)
+            Field("array<vec2<u32>, 3>", /* align */ 8, /* size */ 24,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<vec2<u32>, 4>", /* align */ 8, /* size */ 32)
+            Field("array<vec2<u32>, 4>", /* align */ 8, /* size */ 32,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<vec3<u32>, 1>", /* align */ 16, /* size */ 16)
+            Field("array<vec3<u32>, 1>", /* align */ 16, /* size */ 16,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .Strided(12, 4),
-            Field("array<vec3<u32>, 2>", /* align */ 16, /* size */ 32)
+            Field("array<vec3<u32>, 2>", /* align */ 16, /* size */ 32,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .Strided(12, 4),
-            Field("array<vec3<u32>, 3>", /* align */ 16, /* size */ 48)
+            Field("array<vec3<u32>, 3>", /* align */ 16, /* size */ 48,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .Strided(12, 4),
-            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64)
+            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .Strided(12, 4),
-            Field("array<vec4<u32>, 1>", /* align */ 16, /* size */ 16).AlignAttribute(32),
-            Field("array<vec4<u32>, 2>", /* align */ 16, /* size */ 32).AlignAttribute(32),
-            Field("array<vec4<u32>, 3>", /* align */ 16, /* size */ 48).AlignAttribute(32),
-            Field("array<vec4<u32>, 4>", /* align */ 16, /* size */ 64).AlignAttribute(32),
+            Field("array<vec4<u32>, 1>", /* align */ 16, /* size */ 16,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(32),
+            Field("array<vec4<u32>, 2>", /* align */ 16, /* size */ 32,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(32),
+            Field("array<vec4<u32>, 3>", /* align */ 16, /* size */ 48,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(32),
+            Field("array<vec4<u32>, 4>", /* align */ 16, /* size */ 64,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(32),
 
             // Array types with custom size
-            Field("array<u32, 1>", /* align */ 4, /* size */ 4)
+            Field("array<u32, 1>", /* align */ 4, /* size */ 4, /* requireF16Feature */ false)
                 .SizeAttribute(128)
                 .StorageBufferOnly(),
-            Field("array<u32, 2>", /* align */ 4, /* size */ 8)
+            Field("array<u32, 2>", /* align */ 4, /* size */ 8, /* requireF16Feature */ false)
                 .SizeAttribute(128)
                 .StorageBufferOnly(),
-            Field("array<u32, 3>", /* align */ 4, /* size */ 12)
+            Field("array<u32, 3>", /* align */ 4, /* size */ 12, /* requireF16Feature */ false)
                 .SizeAttribute(128)
                 .StorageBufferOnly(),
-            Field("array<u32, 4>", /* align */ 4, /* size */ 16)
+            Field("array<u32, 4>", /* align */ 4, /* size */ 16, /* requireF16Feature */ false)
                 .SizeAttribute(128)
                 .StorageBufferOnly(),
-            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64)
+            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64,
+                  /* requireF16Feature */ false)
                 .SizeAttribute(128)
                 .Strided(12, 4),
+
+            // Array of f32 matrix
+            Field("array<mat2x2<f32>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            // Uniform scope require the array alignment round up to 16.
+            Field("array<mat2x2<f32>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(16),
+            Field("array<mat2x3<f32>, 3>", /* align */ 16, /* size */ 96,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<mat2x4<f32>, 3>", /* align */ 16, /* size */ 96,
+                  /* requireF16Feature */ false),
+            Field("array<mat3x2<f32>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            // `mat3x2<f16>` can not be the element type of a uniform array, because its size 24 is
+            // not a multiple of 16.
+            Field("array<mat3x2<f32>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(16)
+                .StorageBufferOnly(),
+            Field("array<mat3x3<f32>, 3>", /* align */ 16, /* size */ 144,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<mat3x4<f32>, 3>", /* align */ 16, /* size */ 144,
+                  /* requireF16Feature */ false),
+            Field("array<mat4x2<f32>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<mat4x2<f32>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(16),
+            Field("array<mat4x3<f32>, 3>", /* align */ 16, /* size */ 192,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<mat4x4<f32>, 3>", /* align */ 16, /* size */ 192,
+                  /* requireF16Feature */ false),
+
+            // Array of f16 matrix
+            Field("array<mat2x2<f16>, 3>", /* align */ 4, /* size */ 24,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            Field("array<mat2x3<f16>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .Strided(6, 2)
+                .StorageBufferOnly(),
+            Field("array<mat2x4<f16>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            Field("array<mat3x2<f16>, 3>", /* align */ 4, /* size */ 36,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            Field("array<mat3x3<f16>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ true)
+                .Strided(6, 2)
+                .StorageBufferOnly(),
+            Field("array<mat3x4<f16>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            Field("array<mat4x2<f16>, 3>", /* align */ 4, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            Field("array<mat4x3<f16>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ true)
+                .Strided(6, 2)
+                .StorageBufferOnly(),
+            Field("array<mat4x4<f16>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            // Uniform scope require the array alignment round up to 16, and array element size a
+            // multiple of 16.
+            Field("array<mat2x2<f16>, 3>", /* align */ 4, /* size */ 24,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .StorageBufferOnly(),
+            Field("array<mat2x3<f16>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .Strided(6, 2),
+            Field("array<mat2x4<f16>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16),
+            Field("array<mat3x2<f16>, 3>", /* align */ 4, /* size */ 36,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .StorageBufferOnly(),
+            Field("array<mat3x3<f16>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .Strided(6, 2)
+                .StorageBufferOnly(),
+            Field("array<mat3x4<f16>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .StorageBufferOnly(),
+            Field("array<mat4x2<f16>, 3>", /* align */ 4, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16),
+            Field("array<mat4x3<f16>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .Strided(6, 2),
+            Field("array<mat4x4<f16>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16),
         });
 
     std::vector<ComputeLayoutMemoryBufferTestParams> filtered;
diff --git a/src/tint/BUILD.gn b/src/tint/BUILD.gn
index 491fd7d..3741764 100644
--- a/src/tint/BUILD.gn
+++ b/src/tint/BUILD.gn
@@ -1246,6 +1246,8 @@
       "transform/single_entry_point_test.cc",
       "transform/spirv_atomic_test.cc",
       "transform/std140_exhaustive_test.cc",
+      "transform/std140_f16_test.cc",
+      "transform/std140_f32_test.cc",
       "transform/std140_test.cc",
       "transform/substitute_override_test.cc",
       "transform/test_helper.h",
diff --git a/src/tint/CMakeLists.txt b/src/tint/CMakeLists.txt
index dfe9433..a8644b7 100644
--- a/src/tint/CMakeLists.txt
+++ b/src/tint/CMakeLists.txt
@@ -1212,6 +1212,8 @@
       transform/single_entry_point_test.cc
       transform/spirv_atomic_test.cc
       transform/std140_exhaustive_test.cc
+      transform/std140_f16_test.cc
+      transform/std140_f32_test.cc
       transform/std140_test.cc
       transform/substitute_override_test.cc
       transform/test_helper.h
diff --git a/src/tint/resolver/address_space_layout_validation_test.cc b/src/tint/resolver/address_space_layout_validation_test.cc
index 82da573..b34b889 100644
--- a/src/tint/resolver/address_space_layout_validation_test.cc
+++ b/src/tint/resolver/address_space_layout_validation_test.cc
@@ -363,6 +363,29 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+// Make sure that this doesn't fail validation because vec3's align is 8, but
+// size is 6. 's' should be at offset 6, which is okay here.
+TEST_F(ResolverAddressSpaceLayoutValidationTest, UniformBuffer_Vec3F16MemberOffset_NoFail) {
+    // struct ScalarPackedAtEndOfVec3 {
+    //     v : vec3<f16>;
+    //     s : f16;
+    // };
+    // @group(0) @binding(0)
+    // var<uniform> a : ScalarPackedAtEndOfVec3;
+
+    Enable(ast::Extension::kF16);
+
+    Structure("ScalarPackedAtEndOfVec3", utils::Vector{
+                                             Member("v", ty.vec3(ty.f16())),
+                                             Member("s", ty.f16()),
+                                         });
+
+    GlobalVar(Source{{78, 90}}, "a", ty.type_name("ScalarPackedAtEndOfVec3"),
+              ast::AddressSpace::kUniform, Group(0_a), Binding(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 // Detect array stride must be a multiple of 16 bytes for uniform buffers
 TEST_F(ResolverAddressSpaceLayoutValidationTest, UniformBuffer_InvalidArrayStride_Scalar) {
     // type Inner = array<f32, 10u>;
diff --git a/src/tint/resolver/address_space_validation_test.cc b/src/tint/resolver/address_space_validation_test.cc
index 3ef19a6..60e54df 100644
--- a/src/tint/resolver/address_space_validation_test.cc
+++ b/src/tint/resolver/address_space_validation_test.cc
@@ -113,98 +113,6 @@
 56:78 note: while instantiating 'var' g)");
 }
 
-// F16 types in storage and uniform buffer is not implemented yet.
-// TODO(tint:1473, tint:1502): make these testcases valid after f16 is supported.
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferF16_TemporallyBan) {
-    // var<storage> g : f16;
-    Enable(ast::Extension::kF16);
-
-    GlobalVar("g", ty.f16(Source{{56, 78}}), ast::AddressSpace::kStorage, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_EQ(r()->error(),
-              "56:78 error: using f16 types in 'storage' address space is not "
-              "implemented yet");
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferF16Alias_TemporallyBan) {
-    // type a = f16;
-    // var<storage, read> g : a;
-    Enable(ast::Extension::kF16);
-
-    auto* a = Alias("a", ty.f16());
-    GlobalVar("g", ty.type_name(Source{{56, 78}}, a->name), ast::AddressSpace::kStorage,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_EQ(r()->error(),
-              "56:78 error: using f16 types in 'storage' address space is not "
-              "implemented yet");
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferVectorF16_TemporallyBan) {
-    // var<storage> g : vec4<f16>;
-    Enable(ast::Extension::kF16);
-    GlobalVar("g", ty.vec(Source{{56, 78}}, ty.Of<f16>(), 4u), ast::AddressSpace::kStorage,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_EQ(r()->error(),
-              "56:78 error: using f16 types in 'storage' address space is not "
-              "implemented yet");
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferArrayF16_TemporallyBan) {
-    // struct S { a : f16 };
-    // var<storage, read> g : array<S, 3u>;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure("S", utils::Vector{Member("a", ty.f16(Source{{56, 78}}))});
-    auto* a = ty.array(ty.Of(s), 3_u);
-    GlobalVar("g", a, ast::AddressSpace::kStorage, ast::Access::kRead, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("56:78 error: using f16 types in 'storage' address "
-                                        "space is not implemented yet"));
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructF16_TemporallyBan) {
-    // struct S { x : f16 };
-    // var<storage, read> g : S;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
-    GlobalVar("g", ty.Of(s), ast::AddressSpace::kStorage, ast::Access::kRead, Binding(0_a),
-              Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("12:34 error: using f16 types in 'storage' address "
-                                        "space is not implemented yet"));
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferNoErrorStructF16Aliases_TemporallyBan) {
-    // struct S { x : f16 };
-    // type a1 = S;
-    // var<storage, read> g : a1;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
-    auto* a1 = Alias("a1", ty.Of(s));
-    auto* a2 = Alias("a2", ty.Of(a1));
-    GlobalVar("g", ty.Of(a2), ast::AddressSpace::kStorage, ast::Access::kRead, Binding(0_a),
-              Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("12:34 error: using f16 types in 'storage' address "
-                                        "space is not implemented yet"));
-}
-
 TEST_F(ResolverAddressSpaceValidationTest, StorageBufferPointer) {
     // var<storage> g : ptr<private, f32>;
     GlobalVar(Source{{56, 78}}, "g", ty.pointer(ty.f32(), ast::AddressSpace::kPrivate),
@@ -226,6 +134,27 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferF16) {
+    // var<storage> g : f16;
+    Enable(ast::Extension::kF16);
+
+    GlobalVar("g", ty.f16(Source{{56, 78}}), ast::AddressSpace::kStorage, Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferF16Alias) {
+    // type a = f16;
+    // var<storage, read> g : a;
+    Enable(ast::Extension::kF16);
+
+    auto* a = Alias("a", ty.f16());
+    GlobalVar("g", ty.type_name(Source{{56, 78}}, a->name), ast::AddressSpace::kStorage,
+              Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, StorageBufferVectorF32) {
     // var<storage> g : vec4<f32>;
     GlobalVar(Source{{56, 78}}, "g", ty.vec4<f32>(), ast::AddressSpace::kStorage, Binding(0_a),
@@ -234,6 +163,15 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferVectorF16) {
+    // var<storage> g : vec4<f16>;
+    Enable(ast::Extension::kF16);
+    GlobalVar("g", ty.vec(Source{{56, 78}}, ty.Of<f16>(), 4u), ast::AddressSpace::kStorage,
+              Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, StorageBufferArrayF32) {
     // var<storage, read> g : array<S, 3u>;
     auto* s = Structure("S", utils::Vector{Member("a", ty.f32())});
@@ -244,6 +182,68 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferArrayF16) {
+    // var<storage, read> g : array<S, 3u>;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member("a", ty.f16())});
+    auto* a = ty.array(ty.Of(s), 3_u);
+    GlobalVar(Source{{56, 78}}, "g", a, ast::AddressSpace::kStorage, ast::Access::kRead,
+              Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructI32) {
+    // struct S { x : i32 };
+    // var<storage, read> g : S;
+    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.i32())});
+    GlobalVar(Source{{56, 78}}, "g", ty.Of(s), ast::AddressSpace::kStorage, ast::Access::kRead,
+              Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructI32Aliases) {
+    // struct S { x : i32 };
+    // type a1 = S;
+    // var<storage, read> g : a1;
+    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.i32())});
+    auto* a1 = Alias("a1", ty.Of(s));
+    auto* a2 = Alias("a2", ty.Of(a1));
+    GlobalVar(Source{{56, 78}}, "g", ty.Of(a2), ast::AddressSpace::kStorage, ast::Access::kRead,
+              Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructF16) {
+    // struct S { x : f16 };
+    // var<storage, read> g : S;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
+    GlobalVar("g", ty.Of(s), ast::AddressSpace::kStorage, ast::Access::kRead, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructF16Aliases) {
+    // struct S { x : f16 };
+    // type a1 = S;
+    // var<storage, read> g : a1;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
+    auto* a1 = Alias("a1", ty.Of(s));
+    auto* a2 = Alias("a2", ty.Of(a1));
+    GlobalVar("g", ty.Of(a2), ast::AddressSpace::kStorage, ast::Access::kRead, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, NotStorage_AccessMode) {
     // var<private, read> g : a;
     GlobalVar(Source{{56, 78}}, "g", ty.i32(), ast::AddressSpace::kPrivate, ast::Access::kRead);
@@ -282,29 +282,6 @@
               R"(56:78 error: access mode 'write' is not valid for the 'storage' address space)");
 }
 
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructI32) {
-    // struct S { x : i32 };
-    // var<storage, read> g : S;
-    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.i32())});
-    GlobalVar(Source{{56, 78}}, "g", ty.Of(s), ast::AddressSpace::kStorage, ast::Access::kRead,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_TRUE(r()->Resolve());
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferNoErrorStructI32Aliases) {
-    // struct S { x : i32 };
-    // type a1 = S;
-    // var<storage, read> g : a1;
-    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.i32())});
-    auto* a1 = Alias("a1", ty.Of(s));
-    auto* a2 = Alias("a2", ty.Of(a1));
-    GlobalVar(Source{{56, 78}}, "g", ty.Of(a2), ast::AddressSpace::kStorage, ast::Access::kRead,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_TRUE(r()->Resolve());
-}
-
 TEST_F(ResolverAddressSpaceValidationTest, UniformBuffer_Struct_Runtime) {
     // struct S { m:  array<f32>; };
     // @group(0) @binding(0) var<uniform, > svar : S;
@@ -349,97 +326,6 @@
 56:78 note: while instantiating 'var' g)");
 }
 
-// F16 types in storage and uniform buffer is not implemented yet.
-// TODO(tint:1473, tint:1502): make these testcases valid after f16 is supported.
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferF16_TemporallyBan) {
-    // var<uniform> g : f16;
-    Enable(ast::Extension::kF16);
-
-    GlobalVar("g", ty.f16(Source{{56, 78}}), ast::AddressSpace::kUniform, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_EQ(r()->error(),
-              "56:78 error: using f16 types in 'uniform' address space is not "
-              "implemented yet");
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferF16Alias_TemporallyBan) {
-    // type a = f16;
-    // var<uniform> g : a;
-    Enable(ast::Extension::kF16);
-
-    auto* a = Alias("a", ty.f16());
-    GlobalVar("g", ty.type_name(Source{{56, 78}}, a->name), ast::AddressSpace::kUniform,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_EQ(r()->error(),
-              "56:78 error: using f16 types in 'uniform' address space is not "
-              "implemented yet");
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferVectorF16_TemporallyBan) {
-    // var<uniform> g : vec4<f16>;
-    Enable(ast::Extension::kF16);
-    GlobalVar("g", ty.vec(Source{{56, 78}}, ty.Of<f16>(), 4u), ast::AddressSpace::kUniform,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("56:78 error: using f16 types in 'uniform' address "
-                                        "space is not implemented yet"));
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferArrayF16_TemporallyBan) {
-    // struct S {
-    //   @size(16) f : f16;
-    // }
-    // var<uniform> g : array<S, 3u>;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure(
-        "S", utils::Vector{Member("a", ty.f16(Source{{56, 78}}), utils::Vector{MemberSize(16_a)})});
-    auto* a = ty.array(ty.Of(s), 3_u);
-    GlobalVar("g", a, ast::AddressSpace::kUniform, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("56:78 error: using f16 types in 'uniform' address "
-                                        "space is not implemented yet"));
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferStructF16_TemporallyBan) {
-    // struct S { x : f16 };
-    // var<uniform> g :  S;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
-    GlobalVar("g", ty.Of(s), ast::AddressSpace::kUniform, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("12:34 error: using f16 types in 'uniform' address "
-                                        "space is not implemented yet"));
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferStructF16Aliases_TemporallyBan) {
-    // struct S { x : f16 };
-    // type a1 = S;
-    // var<uniform> g : a1;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
-    auto* a1 = Alias("a1", ty.Of(s));
-    GlobalVar("g", ty.Of(a1), ast::AddressSpace::kUniform, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("12:34 error: using f16 types in 'uniform' address "
-                                        "space is not implemented yet"));
-}
-
 TEST_F(ResolverAddressSpaceValidationTest, UniformBufferPointer) {
     // var<uniform> g : ptr<private, f32>;
     GlobalVar(Source{{56, 78}}, "g", ty.pointer(ty.f32(), ast::AddressSpace::kPrivate),
@@ -461,6 +347,16 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, UniformBufferF16) {
+    // var<uniform> g : f16;
+    Enable(ast::Extension::kF16);
+
+    GlobalVar(Source{{56, 78}}, "g", ty.f16(), ast::AddressSpace::kUniform, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, UniformBufferVectorF32) {
     // var<uniform> g : vec4<f32>;
     GlobalVar(Source{{56, 78}}, "g", ty.vec4<f32>(), ast::AddressSpace::kUniform, Binding(0_a),
@@ -469,6 +365,16 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, UniformBufferVectorF16) {
+    // var<uniform> g : vec4<f16>;
+    Enable(ast::Extension::kF16);
+
+    GlobalVar(Source{{56, 78}}, "g", ty.vec4<f16>(), ast::AddressSpace::kUniform, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, UniformBufferArrayF32) {
     // struct S {
     //   @size(16) f : f32;
@@ -481,6 +387,20 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, UniformBufferArrayF16) {
+    // struct S {
+    //   @size(16) f : f16;
+    // }
+    // var<uniform> g : array<S, 3u>;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member("a", ty.f16(), utils::Vector{MemberSize(16_a)})});
+    auto* a = ty.array(ty.Of(s), 3_u);
+    GlobalVar(Source{{56, 78}}, "g", a, ast::AddressSpace::kUniform, Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, UniformBufferStructI32) {
     // struct S { x : i32 };
     // var<uniform> g :  S;
@@ -503,6 +423,32 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, UniformBufferStructF16) {
+    // struct S { x : f16 };
+    // var<uniform> g :  S;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.f16())});
+    GlobalVar(Source{{56, 78}}, "g", ty.Of(s), ast::AddressSpace::kUniform, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, UniformBufferStructF16Aliases) {
+    // struct S { x : f16 };
+    // type a1 = S;
+    // var<uniform> g : a1;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.f16())});
+    auto* a1 = Alias("a1", ty.Of(s));
+    GlobalVar(Source{{56, 78}}, "g", ty.Of(a1), ast::AddressSpace::kUniform, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, PushConstantBool) {
     // enable chromium_experimental_push_constant;
     // var<push_constant> g : bool;
diff --git a/src/tint/resolver/validator.cc b/src/tint/resolver/validator.cc
index 2fc8f21..70d9f8e 100644
--- a/src/tint/resolver/validator.cc
+++ b/src/tint/resolver/validator.cc
@@ -395,13 +395,11 @@
         return true;
     }
 
-    // Temporally forbid using f16 types in "uniform" and "storage" address space.
-    // TODO(tint:1473, tint:1502): Remove this error after f16 is supported in "uniform" and
-    // "storage" address space but keep for "push_constant" address space.
-    if (Is<sem::F16>(sem::Type::DeepestElementOf(store_ty))) {
-        AddError("using f16 types in '" + utils::ToString(address_space) +
-                     "' address space is not implemented yet",
-                 source);
+    // Among three host-shareable address spaces, f16 is supported in "uniform" and
+    // "storage" address space, but not "push_constant" address space yet.
+    if (Is<sem::F16>(sem::Type::DeepestElementOf(store_ty)) &&
+        address_space == ast::AddressSpace::kPushConstant) {
+        AddError("using f16 types in 'push_constant' address space is not implemented yet", source);
         return false;
     }
 
diff --git a/src/tint/transform/decompose_memory_access.cc b/src/tint/transform/decompose_memory_access.cc
index 046583e..3be550c 100644
--- a/src/tint/transform/decompose_memory_access.cc
+++ b/src/tint/transform/decompose_memory_access.cc
@@ -153,6 +153,10 @@
         out = DecomposeMemoryAccess::Intrinsic::DataType::kF32;
         return true;
     }
+    if (ty->Is<sem::F16>()) {
+        out = DecomposeMemoryAccess::Intrinsic::DataType::kF16;
+        return true;
+    }
     if (auto* vec = ty->As<sem::Vector>()) {
         switch (vec->Width()) {
             case 2:
@@ -168,6 +172,10 @@
                     out = DecomposeMemoryAccess::Intrinsic::DataType::kVec2F32;
                     return true;
                 }
+                if (vec->type()->Is<sem::F16>()) {
+                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec2F16;
+                    return true;
+                }
                 break;
             case 3:
                 if (vec->type()->Is<sem::I32>()) {
@@ -182,6 +190,10 @@
                     out = DecomposeMemoryAccess::Intrinsic::DataType::kVec3F32;
                     return true;
                 }
+                if (vec->type()->Is<sem::F16>()) {
+                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec3F16;
+                    return true;
+                }
                 break;
             case 4:
                 if (vec->type()->Is<sem::I32>()) {
@@ -196,6 +208,10 @@
                     out = DecomposeMemoryAccess::Intrinsic::DataType::kVec4F32;
                     return true;
                 }
+                if (vec->type()->Is<sem::F16>()) {
+                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec4F16;
+                    return true;
+                }
                 break;
         }
         return false;
@@ -776,6 +792,9 @@
         case DataType::kI32:
             ss << "i32";
             break;
+        case DataType::kF16:
+            ss << "f16";
+            break;
         case DataType::kVec2U32:
             ss << "vec2_u32";
             break;
@@ -785,6 +804,9 @@
         case DataType::kVec2I32:
             ss << "vec2_i32";
             break;
+        case DataType::kVec2F16:
+            ss << "vec2_f16";
+            break;
         case DataType::kVec3U32:
             ss << "vec3_u32";
             break;
@@ -794,6 +816,9 @@
         case DataType::kVec3I32:
             ss << "vec3_i32";
             break;
+        case DataType::kVec3F16:
+            ss << "vec3_f16";
+            break;
         case DataType::kVec4U32:
             ss << "vec4_u32";
             break;
@@ -803,6 +828,9 @@
         case DataType::kVec4I32:
             ss << "vec4_i32";
             break;
+        case DataType::kVec4F16:
+            ss << "vec4_f16";
+            break;
     }
     return ss.str();
 }
diff --git a/src/tint/transform/decompose_memory_access.h b/src/tint/transform/decompose_memory_access.h
index 21c196b..3c620e0 100644
--- a/src/tint/transform/decompose_memory_access.h
+++ b/src/tint/transform/decompose_memory_access.h
@@ -60,15 +60,19 @@
             kU32,
             kF32,
             kI32,
+            kF16,
             kVec2U32,
             kVec2F32,
             kVec2I32,
+            kVec2F16,
             kVec3U32,
             kVec3F32,
             kVec3I32,
+            kVec3F16,
             kVec4U32,
             kVec4F32,
             kVec4I32,
+            kVec4F16,
         };
 
         /// Constructor
diff --git a/src/tint/transform/decompose_memory_access_test.cc b/src/tint/transform/decompose_memory_access_test.cc
index 581731e..ac798e0 100644
--- a/src/tint/transform/decompose_memory_access_test.cc
+++ b/src/tint/transform/decompose_memory_access_test.cc
@@ -51,192 +51,308 @@
 
 TEST_F(DecomposeMemoryAccessTest, SB_BasicLoad) {
     auto* src = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_vec3_f16 : array<vec3<f16>, 2>,
 };
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = sb.a;
-  var b : u32 = sb.b;
-  var c : f32 = sb.c;
-  var d : vec2<i32> = sb.d;
-  var e : vec2<u32> = sb.e;
-  var f : vec2<f32> = sb.f;
-  var g : vec3<i32> = sb.g;
-  var h : vec3<u32> = sb.h;
-  var i : vec3<f32> = sb.i;
-  var j : vec4<i32> = sb.j;
-  var k : vec4<u32> = sb.k;
-  var l : vec4<f32> = sb.l;
-  var m : mat2x2<f32> = sb.m;
-  var n : mat2x3<f32> = sb.n;
-  var o : mat2x4<f32> = sb.o;
-  var p : mat3x2<f32> = sb.p;
-  var q : mat3x3<f32> = sb.q;
-  var r : mat3x4<f32> = sb.r;
-  var s : mat4x2<f32> = sb.s;
-  var t : mat4x3<f32> = sb.t;
-  var u : mat4x4<f32> = sb.u;
-  var v : array<vec3<f32>, 2> = sb.v;
+  var scalar_f32 : f32 = sb.scalar_f32;
+  var scalar_i32 : i32 = sb.scalar_i32;
+  var scalar_u32 : u32 = sb.scalar_u32;
+  var scalar_f16 : f16 = sb.scalar_f16;
+  var vec2_f32 : vec2<f32> = sb.vec2_f32;
+  var vec2_i32 : vec2<i32> = sb.vec2_i32;
+  var vec2_u32 : vec2<u32> = sb.vec2_u32;
+  var vec2_f16 : vec2<f16> = sb.vec2_f16;
+  var vec3_f32 : vec3<f32> = sb.vec3_f32;
+  var vec3_i32 : vec3<i32> = sb.vec3_i32;
+  var vec3_u32 : vec3<u32> = sb.vec3_u32;
+  var vec3_f16 : vec3<f16> = sb.vec3_f16;
+  var vec4_f32 : vec4<f32> = sb.vec4_f32;
+  var vec4_i32 : vec4<i32> = sb.vec4_i32;
+  var vec4_u32 : vec4<u32> = sb.vec4_u32;
+  var vec4_f16 : vec4<f16> = sb.vec4_f16;
+  var mat2x2_f32 : mat2x2<f32> = sb.mat2x2_f32;
+  var mat2x3_f32 : mat2x3<f32> = sb.mat2x3_f32;
+  var mat2x4_f32 : mat2x4<f32> = sb.mat2x4_f32;
+  var mat3x2_f32 : mat3x2<f32> = sb.mat3x2_f32;
+  var mat3x3_f32 : mat3x3<f32> = sb.mat3x3_f32;
+  var mat3x4_f32 : mat3x4<f32> = sb.mat3x4_f32;
+  var mat4x2_f32 : mat4x2<f32> = sb.mat4x2_f32;
+  var mat4x3_f32 : mat4x3<f32> = sb.mat4x3_f32;
+  var mat4x4_f32 : mat4x4<f32> = sb.mat4x4_f32;
+  var mat2x2_f16 : mat2x2<f16> = sb.mat2x2_f16;
+  var mat2x3_f16 : mat2x3<f16> = sb.mat2x3_f16;
+  var mat2x4_f16 : mat2x4<f16> = sb.mat2x4_f16;
+  var mat3x2_f16 : mat3x2<f16> = sb.mat3x2_f16;
+  var mat3x3_f16 : mat3x3<f16> = sb.mat3x3_f16;
+  var mat3x4_f16 : mat3x4<f16> = sb.mat3x4_f16;
+  var mat4x2_f16 : mat4x2<f16> = sb.mat4x2_f16;
+  var mat4x3_f16 : mat4x3<f16> = sb.mat4x3_f16;
+  var mat4x4_f16 : mat4x4<f16> = sb.mat4x4_f16;
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = sb.arr2_vec3_f32;
+  var arr2_vec3_f16 : array<vec3<f16>, 2> = sb.arr2_vec3_f16;
 }
 )";
 
     auto* expect = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_vec3_f16 : array<vec3<f16>, 2>,
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
+@internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
+
 @internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
 
 @internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
 
-@internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
-
-@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
-
-@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f16
 
 @internal(intrinsic_load_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
+@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)));
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)));
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_4(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)), tint_symbol_11(buffer, (offset + 48u)));
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)), tint_symbol_7(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)), tint_symbol_15(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_8(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_8(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f16>, 2u> {
+  var arr_1 : array<vec3<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_11(buffer, (offset + (i_1 * 8u)));
+  }
+  return arr_1;
+}
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = tint_symbol(&(sb), 0u);
-  var b : u32 = tint_symbol_1(&(sb), 4u);
-  var c : f32 = tint_symbol_2(&(sb), 8u);
-  var d : vec2<i32> = tint_symbol_3(&(sb), 16u);
-  var e : vec2<u32> = tint_symbol_4(&(sb), 24u);
-  var f : vec2<f32> = tint_symbol_5(&(sb), 32u);
-  var g : vec3<i32> = tint_symbol_6(&(sb), 48u);
-  var h : vec3<u32> = tint_symbol_7(&(sb), 64u);
-  var i : vec3<f32> = tint_symbol_8(&(sb), 80u);
-  var j : vec4<i32> = tint_symbol_9(&(sb), 96u);
-  var k : vec4<u32> = tint_symbol_10(&(sb), 112u);
-  var l : vec4<f32> = tint_symbol_11(&(sb), 128u);
-  var m : mat2x2<f32> = tint_symbol_12(&(sb), 144u);
-  var n : mat2x3<f32> = tint_symbol_13(&(sb), 160u);
-  var o : mat2x4<f32> = tint_symbol_14(&(sb), 192u);
-  var p : mat3x2<f32> = tint_symbol_15(&(sb), 224u);
-  var q : mat3x3<f32> = tint_symbol_16(&(sb), 256u);
-  var r : mat3x4<f32> = tint_symbol_17(&(sb), 304u);
-  var s : mat4x2<f32> = tint_symbol_18(&(sb), 352u);
-  var t : mat4x3<f32> = tint_symbol_19(&(sb), 384u);
-  var u : mat4x4<f32> = tint_symbol_20(&(sb), 448u);
-  var v : array<vec3<f32>, 2> = tint_symbol_21(&(sb), 512u);
+  var scalar_f32 : f32 = tint_symbol(&(sb), 0u);
+  var scalar_i32 : i32 = tint_symbol_1(&(sb), 4u);
+  var scalar_u32 : u32 = tint_symbol_2(&(sb), 8u);
+  var scalar_f16 : f16 = tint_symbol_3(&(sb), 12u);
+  var vec2_f32 : vec2<f32> = tint_symbol_4(&(sb), 16u);
+  var vec2_i32 : vec2<i32> = tint_symbol_5(&(sb), 24u);
+  var vec2_u32 : vec2<u32> = tint_symbol_6(&(sb), 32u);
+  var vec2_f16 : vec2<f16> = tint_symbol_7(&(sb), 40u);
+  var vec3_f32 : vec3<f32> = tint_symbol_8(&(sb), 48u);
+  var vec3_i32 : vec3<i32> = tint_symbol_9(&(sb), 64u);
+  var vec3_u32 : vec3<u32> = tint_symbol_10(&(sb), 80u);
+  var vec3_f16 : vec3<f16> = tint_symbol_11(&(sb), 96u);
+  var vec4_f32 : vec4<f32> = tint_symbol_12(&(sb), 112u);
+  var vec4_i32 : vec4<i32> = tint_symbol_13(&(sb), 128u);
+  var vec4_u32 : vec4<u32> = tint_symbol_14(&(sb), 144u);
+  var vec4_f16 : vec4<f16> = tint_symbol_15(&(sb), 160u);
+  var mat2x2_f32 : mat2x2<f32> = tint_symbol_16(&(sb), 168u);
+  var mat2x3_f32 : mat2x3<f32> = tint_symbol_17(&(sb), 192u);
+  var mat2x4_f32 : mat2x4<f32> = tint_symbol_18(&(sb), 224u);
+  var mat3x2_f32 : mat3x2<f32> = tint_symbol_19(&(sb), 256u);
+  var mat3x3_f32 : mat3x3<f32> = tint_symbol_20(&(sb), 288u);
+  var mat3x4_f32 : mat3x4<f32> = tint_symbol_21(&(sb), 336u);
+  var mat4x2_f32 : mat4x2<f32> = tint_symbol_22(&(sb), 384u);
+  var mat4x3_f32 : mat4x3<f32> = tint_symbol_23(&(sb), 416u);
+  var mat4x4_f32 : mat4x4<f32> = tint_symbol_24(&(sb), 480u);
+  var mat2x2_f16 : mat2x2<f16> = tint_symbol_25(&(sb), 544u);
+  var mat2x3_f16 : mat2x3<f16> = tint_symbol_26(&(sb), 552u);
+  var mat2x4_f16 : mat2x4<f16> = tint_symbol_27(&(sb), 568u);
+  var mat3x2_f16 : mat3x2<f16> = tint_symbol_28(&(sb), 584u);
+  var mat3x3_f16 : mat3x3<f16> = tint_symbol_29(&(sb), 600u);
+  var mat3x4_f16 : mat3x4<f16> = tint_symbol_30(&(sb), 624u);
+  var mat4x2_f16 : mat4x2<f16> = tint_symbol_31(&(sb), 648u);
+  var mat4x3_f16 : mat4x3<f16> = tint_symbol_32(&(sb), 664u);
+  var mat4x4_f16 : mat4x4<f16> = tint_symbol_33(&(sb), 696u);
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = tint_symbol_34(&(sb), 736u);
+  var arr2_vec3_f16 : array<vec3<f16>, 2> = tint_symbol_35(&(sb), 768u);
 }
 )";
 
@@ -247,192 +363,308 @@
 
 TEST_F(DecomposeMemoryAccessTest, SB_BasicLoad_OutOfOrder) {
     auto* src = R"(
+enable f16;
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = sb.a;
-  var b : u32 = sb.b;
-  var c : f32 = sb.c;
-  var d : vec2<i32> = sb.d;
-  var e : vec2<u32> = sb.e;
-  var f : vec2<f32> = sb.f;
-  var g : vec3<i32> = sb.g;
-  var h : vec3<u32> = sb.h;
-  var i : vec3<f32> = sb.i;
-  var j : vec4<i32> = sb.j;
-  var k : vec4<u32> = sb.k;
-  var l : vec4<f32> = sb.l;
-  var m : mat2x2<f32> = sb.m;
-  var n : mat2x3<f32> = sb.n;
-  var o : mat2x4<f32> = sb.o;
-  var p : mat3x2<f32> = sb.p;
-  var q : mat3x3<f32> = sb.q;
-  var r : mat3x4<f32> = sb.r;
-  var s : mat4x2<f32> = sb.s;
-  var t : mat4x3<f32> = sb.t;
-  var u : mat4x4<f32> = sb.u;
-  var v : array<vec3<f32>, 2> = sb.v;
+  var scalar_f32 : f32 = sb.scalar_f32;
+  var scalar_i32 : i32 = sb.scalar_i32;
+  var scalar_u32 : u32 = sb.scalar_u32;
+  var scalar_f16 : f16 = sb.scalar_f16;
+  var vec2_f32 : vec2<f32> = sb.vec2_f32;
+  var vec2_i32 : vec2<i32> = sb.vec2_i32;
+  var vec2_u32 : vec2<u32> = sb.vec2_u32;
+  var vec2_f16 : vec2<f16> = sb.vec2_f16;
+  var vec3_f32 : vec3<f32> = sb.vec3_f32;
+  var vec3_i32 : vec3<i32> = sb.vec3_i32;
+  var vec3_u32 : vec3<u32> = sb.vec3_u32;
+  var vec3_f16 : vec3<f16> = sb.vec3_f16;
+  var vec4_f32 : vec4<f32> = sb.vec4_f32;
+  var vec4_i32 : vec4<i32> = sb.vec4_i32;
+  var vec4_u32 : vec4<u32> = sb.vec4_u32;
+  var vec4_f16 : vec4<f16> = sb.vec4_f16;
+  var mat2x2_f32 : mat2x2<f32> = sb.mat2x2_f32;
+  var mat2x3_f32 : mat2x3<f32> = sb.mat2x3_f32;
+  var mat2x4_f32 : mat2x4<f32> = sb.mat2x4_f32;
+  var mat3x2_f32 : mat3x2<f32> = sb.mat3x2_f32;
+  var mat3x3_f32 : mat3x3<f32> = sb.mat3x3_f32;
+  var mat3x4_f32 : mat3x4<f32> = sb.mat3x4_f32;
+  var mat4x2_f32 : mat4x2<f32> = sb.mat4x2_f32;
+  var mat4x3_f32 : mat4x3<f32> = sb.mat4x3_f32;
+  var mat4x4_f32 : mat4x4<f32> = sb.mat4x4_f32;
+  var mat2x2_f16 : mat2x2<f16> = sb.mat2x2_f16;
+  var mat2x3_f16 : mat2x3<f16> = sb.mat2x3_f16;
+  var mat2x4_f16 : mat2x4<f16> = sb.mat2x4_f16;
+  var mat3x2_f16 : mat3x2<f16> = sb.mat3x2_f16;
+  var mat3x3_f16 : mat3x3<f16> = sb.mat3x3_f16;
+  var mat3x4_f16 : mat3x4<f16> = sb.mat3x4_f16;
+  var mat4x2_f16 : mat4x2<f16> = sb.mat4x2_f16;
+  var mat4x3_f16 : mat4x3<f16> = sb.mat4x3_f16;
+  var mat4x4_f16 : mat4x4<f16> = sb.mat4x4_f16;
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = sb.arr2_vec3_f32;
+  var arr2_vec3_f16 : array<vec3<f16>, 2> = sb.arr2_vec3_f16;
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_vec3_f16 : array<vec3<f16>, 2>,
 };
 )";
 
     auto* expect = R"(
-@internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
-
-@internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+enable f16;
 
 @internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
 
-@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
+@internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
 
-@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+
+@internal(intrinsic_load_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f16
 
 @internal(intrinsic_load_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
+@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)));
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)));
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_4(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)), tint_symbol_11(buffer, (offset + 48u)));
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)), tint_symbol_7(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)), tint_symbol_15(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_8(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_8(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f16>, 2u> {
+  var arr_1 : array<vec3<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_11(buffer, (offset + (i_1 * 8u)));
+  }
+  return arr_1;
+}
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = tint_symbol(&(sb), 0u);
-  var b : u32 = tint_symbol_1(&(sb), 4u);
-  var c : f32 = tint_symbol_2(&(sb), 8u);
-  var d : vec2<i32> = tint_symbol_3(&(sb), 16u);
-  var e : vec2<u32> = tint_symbol_4(&(sb), 24u);
-  var f : vec2<f32> = tint_symbol_5(&(sb), 32u);
-  var g : vec3<i32> = tint_symbol_6(&(sb), 48u);
-  var h : vec3<u32> = tint_symbol_7(&(sb), 64u);
-  var i : vec3<f32> = tint_symbol_8(&(sb), 80u);
-  var j : vec4<i32> = tint_symbol_9(&(sb), 96u);
-  var k : vec4<u32> = tint_symbol_10(&(sb), 112u);
-  var l : vec4<f32> = tint_symbol_11(&(sb), 128u);
-  var m : mat2x2<f32> = tint_symbol_12(&(sb), 144u);
-  var n : mat2x3<f32> = tint_symbol_13(&(sb), 160u);
-  var o : mat2x4<f32> = tint_symbol_14(&(sb), 192u);
-  var p : mat3x2<f32> = tint_symbol_15(&(sb), 224u);
-  var q : mat3x3<f32> = tint_symbol_16(&(sb), 256u);
-  var r : mat3x4<f32> = tint_symbol_17(&(sb), 304u);
-  var s : mat4x2<f32> = tint_symbol_18(&(sb), 352u);
-  var t : mat4x3<f32> = tint_symbol_19(&(sb), 384u);
-  var u : mat4x4<f32> = tint_symbol_20(&(sb), 448u);
-  var v : array<vec3<f32>, 2> = tint_symbol_21(&(sb), 512u);
+  var scalar_f32 : f32 = tint_symbol(&(sb), 0u);
+  var scalar_i32 : i32 = tint_symbol_1(&(sb), 4u);
+  var scalar_u32 : u32 = tint_symbol_2(&(sb), 8u);
+  var scalar_f16 : f16 = tint_symbol_3(&(sb), 12u);
+  var vec2_f32 : vec2<f32> = tint_symbol_4(&(sb), 16u);
+  var vec2_i32 : vec2<i32> = tint_symbol_5(&(sb), 24u);
+  var vec2_u32 : vec2<u32> = tint_symbol_6(&(sb), 32u);
+  var vec2_f16 : vec2<f16> = tint_symbol_7(&(sb), 40u);
+  var vec3_f32 : vec3<f32> = tint_symbol_8(&(sb), 48u);
+  var vec3_i32 : vec3<i32> = tint_symbol_9(&(sb), 64u);
+  var vec3_u32 : vec3<u32> = tint_symbol_10(&(sb), 80u);
+  var vec3_f16 : vec3<f16> = tint_symbol_11(&(sb), 96u);
+  var vec4_f32 : vec4<f32> = tint_symbol_12(&(sb), 112u);
+  var vec4_i32 : vec4<i32> = tint_symbol_13(&(sb), 128u);
+  var vec4_u32 : vec4<u32> = tint_symbol_14(&(sb), 144u);
+  var vec4_f16 : vec4<f16> = tint_symbol_15(&(sb), 160u);
+  var mat2x2_f32 : mat2x2<f32> = tint_symbol_16(&(sb), 168u);
+  var mat2x3_f32 : mat2x3<f32> = tint_symbol_17(&(sb), 192u);
+  var mat2x4_f32 : mat2x4<f32> = tint_symbol_18(&(sb), 224u);
+  var mat3x2_f32 : mat3x2<f32> = tint_symbol_19(&(sb), 256u);
+  var mat3x3_f32 : mat3x3<f32> = tint_symbol_20(&(sb), 288u);
+  var mat3x4_f32 : mat3x4<f32> = tint_symbol_21(&(sb), 336u);
+  var mat4x2_f32 : mat4x2<f32> = tint_symbol_22(&(sb), 384u);
+  var mat4x3_f32 : mat4x3<f32> = tint_symbol_23(&(sb), 416u);
+  var mat4x4_f32 : mat4x4<f32> = tint_symbol_24(&(sb), 480u);
+  var mat2x2_f16 : mat2x2<f16> = tint_symbol_25(&(sb), 544u);
+  var mat2x3_f16 : mat2x3<f16> = tint_symbol_26(&(sb), 552u);
+  var mat2x4_f16 : mat2x4<f16> = tint_symbol_27(&(sb), 568u);
+  var mat3x2_f16 : mat3x2<f16> = tint_symbol_28(&(sb), 584u);
+  var mat3x3_f16 : mat3x3<f16> = tint_symbol_29(&(sb), 600u);
+  var mat3x4_f16 : mat3x4<f16> = tint_symbol_30(&(sb), 624u);
+  var mat4x2_f16 : mat4x2<f16> = tint_symbol_31(&(sb), 648u);
+  var mat4x3_f16 : mat4x3<f16> = tint_symbol_32(&(sb), 664u);
+  var mat4x4_f16 : mat4x4<f16> = tint_symbol_33(&(sb), 696u);
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = tint_symbol_34(&(sb), 736u);
+  var arr2_vec3_f16 : array<vec3<f16>, 2> = tint_symbol_35(&(sb), 768u);
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_vec3_f16 : array<vec3<f16>, 2>,
 }
 )";
 
@@ -443,192 +675,308 @@
 
 TEST_F(DecomposeMemoryAccessTest, UB_BasicLoad) {
     auto* src = R"(
+enable f16;
+
 struct UB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 
 @group(0) @binding(0) var<uniform> ub : UB;
 
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = ub.a;
-  var b : u32 = ub.b;
-  var c : f32 = ub.c;
-  var d : vec2<i32> = ub.d;
-  var e : vec2<u32> = ub.e;
-  var f : vec2<f32> = ub.f;
-  var g : vec3<i32> = ub.g;
-  var h : vec3<u32> = ub.h;
-  var i : vec3<f32> = ub.i;
-  var j : vec4<i32> = ub.j;
-  var k : vec4<u32> = ub.k;
-  var l : vec4<f32> = ub.l;
-  var m : mat2x2<f32> = ub.m;
-  var n : mat2x3<f32> = ub.n;
-  var o : mat2x4<f32> = ub.o;
-  var p : mat3x2<f32> = ub.p;
-  var q : mat3x3<f32> = ub.q;
-  var r : mat3x4<f32> = ub.r;
-  var s : mat4x2<f32> = ub.s;
-  var t : mat4x3<f32> = ub.t;
-  var u : mat4x4<f32> = ub.u;
-  var v : array<vec3<f32>, 2> = ub.v;
+  var scalar_f32 : f32 = ub.scalar_f32;
+  var scalar_i32 : i32 = ub.scalar_i32;
+  var scalar_u32 : u32 = ub.scalar_u32;
+  var scalar_f16 : f16 = ub.scalar_f16;
+  var vec2_f32 : vec2<f32> = ub.vec2_f32;
+  var vec2_i32 : vec2<i32> = ub.vec2_i32;
+  var vec2_u32 : vec2<u32> = ub.vec2_u32;
+  var vec2_f16 : vec2<f16> = ub.vec2_f16;
+  var vec3_f32 : vec3<f32> = ub.vec3_f32;
+  var vec3_i32 : vec3<i32> = ub.vec3_i32;
+  var vec3_u32 : vec3<u32> = ub.vec3_u32;
+  var vec3_f16 : vec3<f16> = ub.vec3_f16;
+  var vec4_f32 : vec4<f32> = ub.vec4_f32;
+  var vec4_i32 : vec4<i32> = ub.vec4_i32;
+  var vec4_u32 : vec4<u32> = ub.vec4_u32;
+  var vec4_f16 : vec4<f16> = ub.vec4_f16;
+  var mat2x2_f32 : mat2x2<f32> = ub.mat2x2_f32;
+  var mat2x3_f32 : mat2x3<f32> = ub.mat2x3_f32;
+  var mat2x4_f32 : mat2x4<f32> = ub.mat2x4_f32;
+  var mat3x2_f32 : mat3x2<f32> = ub.mat3x2_f32;
+  var mat3x3_f32 : mat3x3<f32> = ub.mat3x3_f32;
+  var mat3x4_f32 : mat3x4<f32> = ub.mat3x4_f32;
+  var mat4x2_f32 : mat4x2<f32> = ub.mat4x2_f32;
+  var mat4x3_f32 : mat4x3<f32> = ub.mat4x3_f32;
+  var mat4x4_f32 : mat4x4<f32> = ub.mat4x4_f32;
+  var mat2x2_f16 : mat2x2<f16> = ub.mat2x2_f16;
+  var mat2x3_f16 : mat2x3<f16> = ub.mat2x3_f16;
+  var mat2x4_f16 : mat2x4<f16> = ub.mat2x4_f16;
+  var mat3x2_f16 : mat3x2<f16> = ub.mat3x2_f16;
+  var mat3x3_f16 : mat3x3<f16> = ub.mat3x3_f16;
+  var mat3x4_f16 : mat3x4<f16> = ub.mat3x4_f16;
+  var mat4x2_f16 : mat4x2<f16> = ub.mat4x2_f16;
+  var mat4x3_f16 : mat4x3<f16> = ub.mat4x3_f16;
+  var mat4x4_f16 : mat4x4<f16> = ub.mat4x4_f16;
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = ub.arr2_vec3_f32;
+  var arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = ub.arr2_mat4x2_f16;
 }
 )";
 
     auto* expect = R"(
+enable f16;
+
 struct UB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 
 @group(0) @binding(0) var<uniform> ub : UB;
 
+@internal(intrinsic_load_uniform_f32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f32
+
 @internal(intrinsic_load_uniform_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> i32
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> i32
 
 @internal(intrinsic_load_uniform_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> u32
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> u32
 
-@internal(intrinsic_load_uniform_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f32
-
-@internal(intrinsic_load_uniform_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<i32>
-
-@internal(intrinsic_load_uniform_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_uniform_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f16
 
 @internal(intrinsic_load_uniform_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f32>
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_uniform_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_uniform_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_uniform_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_uniform_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_uniform_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_uniform_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_uniform_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_uniform_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_uniform_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_uniform_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_uniform_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_uniform_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f32>
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
+@internal(intrinsic_load_uniform_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_uniform_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_uniform_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)));
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)));
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_4(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)), tint_symbol_11(buffer, (offset + 48u)));
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)), tint_symbol_7(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)), tint_symbol_15(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_8(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_8(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<mat4x2<f16>, 2u> {
+  var arr_1 : array<mat4x2<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_31(buffer, (offset + (i_1 * 16u)));
+  }
+  return arr_1;
+}
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = tint_symbol(&(ub), 0u);
-  var b : u32 = tint_symbol_1(&(ub), 4u);
-  var c : f32 = tint_symbol_2(&(ub), 8u);
-  var d : vec2<i32> = tint_symbol_3(&(ub), 16u);
-  var e : vec2<u32> = tint_symbol_4(&(ub), 24u);
-  var f : vec2<f32> = tint_symbol_5(&(ub), 32u);
-  var g : vec3<i32> = tint_symbol_6(&(ub), 48u);
-  var h : vec3<u32> = tint_symbol_7(&(ub), 64u);
-  var i : vec3<f32> = tint_symbol_8(&(ub), 80u);
-  var j : vec4<i32> = tint_symbol_9(&(ub), 96u);
-  var k : vec4<u32> = tint_symbol_10(&(ub), 112u);
-  var l : vec4<f32> = tint_symbol_11(&(ub), 128u);
-  var m : mat2x2<f32> = tint_symbol_12(&(ub), 144u);
-  var n : mat2x3<f32> = tint_symbol_13(&(ub), 160u);
-  var o : mat2x4<f32> = tint_symbol_14(&(ub), 192u);
-  var p : mat3x2<f32> = tint_symbol_15(&(ub), 224u);
-  var q : mat3x3<f32> = tint_symbol_16(&(ub), 256u);
-  var r : mat3x4<f32> = tint_symbol_17(&(ub), 304u);
-  var s : mat4x2<f32> = tint_symbol_18(&(ub), 352u);
-  var t : mat4x3<f32> = tint_symbol_19(&(ub), 384u);
-  var u : mat4x4<f32> = tint_symbol_20(&(ub), 448u);
-  var v : array<vec3<f32>, 2> = tint_symbol_21(&(ub), 512u);
+  var scalar_f32 : f32 = tint_symbol(&(ub), 0u);
+  var scalar_i32 : i32 = tint_symbol_1(&(ub), 4u);
+  var scalar_u32 : u32 = tint_symbol_2(&(ub), 8u);
+  var scalar_f16 : f16 = tint_symbol_3(&(ub), 12u);
+  var vec2_f32 : vec2<f32> = tint_symbol_4(&(ub), 16u);
+  var vec2_i32 : vec2<i32> = tint_symbol_5(&(ub), 24u);
+  var vec2_u32 : vec2<u32> = tint_symbol_6(&(ub), 32u);
+  var vec2_f16 : vec2<f16> = tint_symbol_7(&(ub), 40u);
+  var vec3_f32 : vec3<f32> = tint_symbol_8(&(ub), 48u);
+  var vec3_i32 : vec3<i32> = tint_symbol_9(&(ub), 64u);
+  var vec3_u32 : vec3<u32> = tint_symbol_10(&(ub), 80u);
+  var vec3_f16 : vec3<f16> = tint_symbol_11(&(ub), 96u);
+  var vec4_f32 : vec4<f32> = tint_symbol_12(&(ub), 112u);
+  var vec4_i32 : vec4<i32> = tint_symbol_13(&(ub), 128u);
+  var vec4_u32 : vec4<u32> = tint_symbol_14(&(ub), 144u);
+  var vec4_f16 : vec4<f16> = tint_symbol_15(&(ub), 160u);
+  var mat2x2_f32 : mat2x2<f32> = tint_symbol_16(&(ub), 168u);
+  var mat2x3_f32 : mat2x3<f32> = tint_symbol_17(&(ub), 192u);
+  var mat2x4_f32 : mat2x4<f32> = tint_symbol_18(&(ub), 224u);
+  var mat3x2_f32 : mat3x2<f32> = tint_symbol_19(&(ub), 256u);
+  var mat3x3_f32 : mat3x3<f32> = tint_symbol_20(&(ub), 288u);
+  var mat3x4_f32 : mat3x4<f32> = tint_symbol_21(&(ub), 336u);
+  var mat4x2_f32 : mat4x2<f32> = tint_symbol_22(&(ub), 384u);
+  var mat4x3_f32 : mat4x3<f32> = tint_symbol_23(&(ub), 416u);
+  var mat4x4_f32 : mat4x4<f32> = tint_symbol_24(&(ub), 480u);
+  var mat2x2_f16 : mat2x2<f16> = tint_symbol_25(&(ub), 544u);
+  var mat2x3_f16 : mat2x3<f16> = tint_symbol_26(&(ub), 552u);
+  var mat2x4_f16 : mat2x4<f16> = tint_symbol_27(&(ub), 568u);
+  var mat3x2_f16 : mat3x2<f16> = tint_symbol_28(&(ub), 584u);
+  var mat3x3_f16 : mat3x3<f16> = tint_symbol_29(&(ub), 600u);
+  var mat3x4_f16 : mat3x4<f16> = tint_symbol_30(&(ub), 624u);
+  var mat4x2_f16 : mat4x2<f16> = tint_symbol_31(&(ub), 648u);
+  var mat4x3_f16 : mat4x3<f16> = tint_symbol_32(&(ub), 664u);
+  var mat4x4_f16 : mat4x4<f16> = tint_symbol_33(&(ub), 696u);
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = tint_symbol_34(&(ub), 736u);
+  var arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = tint_symbol_35(&(ub), 768u);
 }
 )";
 
@@ -639,192 +987,308 @@
 
 TEST_F(DecomposeMemoryAccessTest, UB_BasicLoad_OutOfOrder) {
     auto* src = R"(
+enable f16;
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = ub.a;
-  var b : u32 = ub.b;
-  var c : f32 = ub.c;
-  var d : vec2<i32> = ub.d;
-  var e : vec2<u32> = ub.e;
-  var f : vec2<f32> = ub.f;
-  var g : vec3<i32> = ub.g;
-  var h : vec3<u32> = ub.h;
-  var i : vec3<f32> = ub.i;
-  var j : vec4<i32> = ub.j;
-  var k : vec4<u32> = ub.k;
-  var l : vec4<f32> = ub.l;
-  var m : mat2x2<f32> = ub.m;
-  var n : mat2x3<f32> = ub.n;
-  var o : mat2x4<f32> = ub.o;
-  var p : mat3x2<f32> = ub.p;
-  var q : mat3x3<f32> = ub.q;
-  var r : mat3x4<f32> = ub.r;
-  var s : mat4x2<f32> = ub.s;
-  var t : mat4x3<f32> = ub.t;
-  var u : mat4x4<f32> = ub.u;
-  var v : array<vec3<f32>, 2> = ub.v;
+  var scalar_f32 : f32 = ub.scalar_f32;
+  var scalar_i32 : i32 = ub.scalar_i32;
+  var scalar_u32 : u32 = ub.scalar_u32;
+  var scalar_f16 : f16 = ub.scalar_f16;
+  var vec2_f32 : vec2<f32> = ub.vec2_f32;
+  var vec2_i32 : vec2<i32> = ub.vec2_i32;
+  var vec2_u32 : vec2<u32> = ub.vec2_u32;
+  var vec2_f16 : vec2<f16> = ub.vec2_f16;
+  var vec3_f32 : vec3<f32> = ub.vec3_f32;
+  var vec3_i32 : vec3<i32> = ub.vec3_i32;
+  var vec3_u32 : vec3<u32> = ub.vec3_u32;
+  var vec3_f16 : vec3<f16> = ub.vec3_f16;
+  var vec4_f32 : vec4<f32> = ub.vec4_f32;
+  var vec4_i32 : vec4<i32> = ub.vec4_i32;
+  var vec4_u32 : vec4<u32> = ub.vec4_u32;
+  var vec4_f16 : vec4<f16> = ub.vec4_f16;
+  var mat2x2_f32 : mat2x2<f32> = ub.mat2x2_f32;
+  var mat2x3_f32 : mat2x3<f32> = ub.mat2x3_f32;
+  var mat2x4_f32 : mat2x4<f32> = ub.mat2x4_f32;
+  var mat3x2_f32 : mat3x2<f32> = ub.mat3x2_f32;
+  var mat3x3_f32 : mat3x3<f32> = ub.mat3x3_f32;
+  var mat3x4_f32 : mat3x4<f32> = ub.mat3x4_f32;
+  var mat4x2_f32 : mat4x2<f32> = ub.mat4x2_f32;
+  var mat4x3_f32 : mat4x3<f32> = ub.mat4x3_f32;
+  var mat4x4_f32 : mat4x4<f32> = ub.mat4x4_f32;
+  var mat2x2_f16 : mat2x2<f16> = ub.mat2x2_f16;
+  var mat2x3_f16 : mat2x3<f16> = ub.mat2x3_f16;
+  var mat2x4_f16 : mat2x4<f16> = ub.mat2x4_f16;
+  var mat3x2_f16 : mat3x2<f16> = ub.mat3x2_f16;
+  var mat3x3_f16 : mat3x3<f16> = ub.mat3x3_f16;
+  var mat3x4_f16 : mat3x4<f16> = ub.mat3x4_f16;
+  var mat4x2_f16 : mat4x2<f16> = ub.mat4x2_f16;
+  var mat4x3_f16 : mat4x3<f16> = ub.mat4x3_f16;
+  var mat4x4_f16 : mat4x4<f16> = ub.mat4x4_f16;
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = ub.arr2_vec3_f32;
+  var arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = ub.arr2_mat4x2_f16;
 }
 
 @group(0) @binding(0) var<uniform> ub : UB;
 
 struct UB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 )";
 
     auto* expect = R"(
-@internal(intrinsic_load_uniform_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> i32
-
-@internal(intrinsic_load_uniform_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> u32
+enable f16;
 
 @internal(intrinsic_load_uniform_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f32
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f32
 
-@internal(intrinsic_load_uniform_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<i32>
+@internal(intrinsic_load_uniform_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> i32
 
-@internal(intrinsic_load_uniform_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_uniform_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> u32
+
+@internal(intrinsic_load_uniform_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f16
 
 @internal(intrinsic_load_uniform_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f32>
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_uniform_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_uniform_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_uniform_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_uniform_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_uniform_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_uniform_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_uniform_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_uniform_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_uniform_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_uniform_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_uniform_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_uniform_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f32>
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
+@internal(intrinsic_load_uniform_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_uniform_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_uniform_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)));
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)));
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_4(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)), tint_symbol_11(buffer, (offset + 48u)));
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)), tint_symbol_7(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)), tint_symbol_15(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_8(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_8(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<mat4x2<f16>, 2u> {
+  var arr_1 : array<mat4x2<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_31(buffer, (offset + (i_1 * 16u)));
+  }
+  return arr_1;
+}
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = tint_symbol(&(ub), 0u);
-  var b : u32 = tint_symbol_1(&(ub), 4u);
-  var c : f32 = tint_symbol_2(&(ub), 8u);
-  var d : vec2<i32> = tint_symbol_3(&(ub), 16u);
-  var e : vec2<u32> = tint_symbol_4(&(ub), 24u);
-  var f : vec2<f32> = tint_symbol_5(&(ub), 32u);
-  var g : vec3<i32> = tint_symbol_6(&(ub), 48u);
-  var h : vec3<u32> = tint_symbol_7(&(ub), 64u);
-  var i : vec3<f32> = tint_symbol_8(&(ub), 80u);
-  var j : vec4<i32> = tint_symbol_9(&(ub), 96u);
-  var k : vec4<u32> = tint_symbol_10(&(ub), 112u);
-  var l : vec4<f32> = tint_symbol_11(&(ub), 128u);
-  var m : mat2x2<f32> = tint_symbol_12(&(ub), 144u);
-  var n : mat2x3<f32> = tint_symbol_13(&(ub), 160u);
-  var o : mat2x4<f32> = tint_symbol_14(&(ub), 192u);
-  var p : mat3x2<f32> = tint_symbol_15(&(ub), 224u);
-  var q : mat3x3<f32> = tint_symbol_16(&(ub), 256u);
-  var r : mat3x4<f32> = tint_symbol_17(&(ub), 304u);
-  var s : mat4x2<f32> = tint_symbol_18(&(ub), 352u);
-  var t : mat4x3<f32> = tint_symbol_19(&(ub), 384u);
-  var u : mat4x4<f32> = tint_symbol_20(&(ub), 448u);
-  var v : array<vec3<f32>, 2> = tint_symbol_21(&(ub), 512u);
+  var scalar_f32 : f32 = tint_symbol(&(ub), 0u);
+  var scalar_i32 : i32 = tint_symbol_1(&(ub), 4u);
+  var scalar_u32 : u32 = tint_symbol_2(&(ub), 8u);
+  var scalar_f16 : f16 = tint_symbol_3(&(ub), 12u);
+  var vec2_f32 : vec2<f32> = tint_symbol_4(&(ub), 16u);
+  var vec2_i32 : vec2<i32> = tint_symbol_5(&(ub), 24u);
+  var vec2_u32 : vec2<u32> = tint_symbol_6(&(ub), 32u);
+  var vec2_f16 : vec2<f16> = tint_symbol_7(&(ub), 40u);
+  var vec3_f32 : vec3<f32> = tint_symbol_8(&(ub), 48u);
+  var vec3_i32 : vec3<i32> = tint_symbol_9(&(ub), 64u);
+  var vec3_u32 : vec3<u32> = tint_symbol_10(&(ub), 80u);
+  var vec3_f16 : vec3<f16> = tint_symbol_11(&(ub), 96u);
+  var vec4_f32 : vec4<f32> = tint_symbol_12(&(ub), 112u);
+  var vec4_i32 : vec4<i32> = tint_symbol_13(&(ub), 128u);
+  var vec4_u32 : vec4<u32> = tint_symbol_14(&(ub), 144u);
+  var vec4_f16 : vec4<f16> = tint_symbol_15(&(ub), 160u);
+  var mat2x2_f32 : mat2x2<f32> = tint_symbol_16(&(ub), 168u);
+  var mat2x3_f32 : mat2x3<f32> = tint_symbol_17(&(ub), 192u);
+  var mat2x4_f32 : mat2x4<f32> = tint_symbol_18(&(ub), 224u);
+  var mat3x2_f32 : mat3x2<f32> = tint_symbol_19(&(ub), 256u);
+  var mat3x3_f32 : mat3x3<f32> = tint_symbol_20(&(ub), 288u);
+  var mat3x4_f32 : mat3x4<f32> = tint_symbol_21(&(ub), 336u);
+  var mat4x2_f32 : mat4x2<f32> = tint_symbol_22(&(ub), 384u);
+  var mat4x3_f32 : mat4x3<f32> = tint_symbol_23(&(ub), 416u);
+  var mat4x4_f32 : mat4x4<f32> = tint_symbol_24(&(ub), 480u);
+  var mat2x2_f16 : mat2x2<f16> = tint_symbol_25(&(ub), 544u);
+  var mat2x3_f16 : mat2x3<f16> = tint_symbol_26(&(ub), 552u);
+  var mat2x4_f16 : mat2x4<f16> = tint_symbol_27(&(ub), 568u);
+  var mat3x2_f16 : mat3x2<f16> = tint_symbol_28(&(ub), 584u);
+  var mat3x3_f16 : mat3x3<f16> = tint_symbol_29(&(ub), 600u);
+  var mat3x4_f16 : mat3x4<f16> = tint_symbol_30(&(ub), 624u);
+  var mat4x2_f16 : mat4x2<f16> = tint_symbol_31(&(ub), 648u);
+  var mat4x3_f16 : mat4x3<f16> = tint_symbol_32(&(ub), 664u);
+  var mat4x4_f16 : mat4x4<f16> = tint_symbol_33(&(ub), 696u);
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = tint_symbol_34(&(ub), 736u);
+  var arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = tint_symbol_35(&(ub), 768u);
 }
 
 @group(0) @binding(0) var<uniform> ub : UB;
 
 struct UB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 )";
 
@@ -835,209 +1299,342 @@
 
 TEST_F(DecomposeMemoryAccessTest, SB_BasicStore) {
     auto* src = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 @compute @workgroup_size(1)
 fn main() {
-  sb.a = i32();
-  sb.b = u32();
-  sb.c = f32();
-  sb.d = vec2<i32>();
-  sb.e = vec2<u32>();
-  sb.f = vec2<f32>();
-  sb.g = vec3<i32>();
-  sb.h = vec3<u32>();
-  sb.i = vec3<f32>();
-  sb.j = vec4<i32>();
-  sb.k = vec4<u32>();
-  sb.l = vec4<f32>();
-  sb.m = mat2x2<f32>();
-  sb.n = mat2x3<f32>();
-  sb.o = mat2x4<f32>();
-  sb.p = mat3x2<f32>();
-  sb.q = mat3x3<f32>();
-  sb.r = mat3x4<f32>();
-  sb.s = mat4x2<f32>();
-  sb.t = mat4x3<f32>();
-  sb.u = mat4x4<f32>();
-  sb.v = array<vec3<f32>, 2>();
+  sb.scalar_f32 = f32();
+  sb.scalar_i32 = i32();
+  sb.scalar_u32 = u32();
+  sb.scalar_f16 = f16();
+  sb.vec2_f32 = vec2<f32>();
+  sb.vec2_i32 = vec2<i32>();
+  sb.vec2_u32 = vec2<u32>();
+  sb.vec2_f16 = vec2<f16>();
+  sb.vec3_f32 = vec3<f32>();
+  sb.vec3_i32 = vec3<i32>();
+  sb.vec3_u32 = vec3<u32>();
+  sb.vec3_f16 = vec3<f16>();
+  sb.vec4_f32 = vec4<f32>();
+  sb.vec4_i32 = vec4<i32>();
+  sb.vec4_u32 = vec4<u32>();
+  sb.vec4_f16 = vec4<f16>();
+  sb.mat2x2_f32 = mat2x2<f32>();
+  sb.mat2x3_f32 = mat2x3<f32>();
+  sb.mat2x4_f32 = mat2x4<f32>();
+  sb.mat3x2_f32 = mat3x2<f32>();
+  sb.mat3x3_f32 = mat3x3<f32>();
+  sb.mat3x4_f32 = mat3x4<f32>();
+  sb.mat4x2_f32 = mat4x2<f32>();
+  sb.mat4x3_f32 = mat4x3<f32>();
+  sb.mat4x4_f32 = mat4x4<f32>();
+  sb.mat2x2_f16 = mat2x2<f16>();
+  sb.mat2x3_f16 = mat2x3<f16>();
+  sb.mat2x4_f16 = mat2x4<f16>();
+  sb.mat3x2_f16 = mat3x2<f16>();
+  sb.mat3x3_f16 = mat3x3<f16>();
+  sb.mat3x4_f16 = mat3x4<f16>();
+  sb.mat4x2_f16 = mat4x2<f16>();
+  sb.mat4x3_f16 = mat4x3<f16>();
+  sb.mat4x4_f16 = mat4x4<f16>();
+  sb.arr2_vec3_f32 = array<vec3<f32>, 2>();
+  sb.arr2_mat4x2_f16 = array<mat4x2<f16>, 2>();
 }
 )";
 
     auto* expect = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
+@internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
+
 @internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
 
 @internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
 
-@internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
-
-@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
-
-@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+@internal(intrinsic_store_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f16)
 
 @internal(intrinsic_store_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
 
-@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
+@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
 
-@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+
+@internal(intrinsic_store_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f16>)
 
 @internal(intrinsic_store_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f32>)
 
-@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
 
-@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+
+@internal(intrinsic_store_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f16>)
 
 @internal(intrinsic_store_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+
+@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+
+@internal(intrinsic_store_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f16>)
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
-  tint_symbol_5(buffer, (offset + 16u), value[2u]);
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
+  tint_symbol_4(buffer, (offset + 16u), value[2u]);
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
   tint_symbol_8(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
-  tint_symbol_11(buffer, (offset + 32u), value[2u]);
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+  tint_symbol_12(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
-  tint_symbol_5(buffer, (offset + 16u), value[2u]);
-  tint_symbol_5(buffer, (offset + 24u), value[3u]);
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
+  tint_symbol_4(buffer, (offset + 16u), value[2u]);
+  tint_symbol_4(buffer, (offset + 24u), value[3u]);
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
   tint_symbol_8(buffer, (offset + 32u), value[2u]);
   tint_symbol_8(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
-  tint_symbol_11(buffer, (offset + 32u), value[2u]);
-  tint_symbol_11(buffer, (offset + 48u), value[3u]);
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+  tint_symbol_12(buffer, (offset + 32u), value[2u]);
+  tint_symbol_12(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+  tint_symbol_7(buffer, (offset + 8u), value[2u]);
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+  tint_symbol_11(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+  tint_symbol_15(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+  tint_symbol_7(buffer, (offset + 8u), value[2u]);
+  tint_symbol_7(buffer, (offset + 12u), value[3u]);
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+  tint_symbol_11(buffer, (offset + 16u), value[2u]);
+  tint_symbol_11(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+  tint_symbol_15(buffer, (offset + 16u), value[2u]);
+  tint_symbol_15(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
   var array = value;
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    tint_symbol_8(buffer, (offset + (i * 16u)), array[i]);
+  }
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<mat4x2<f16>, 2u>) {
+  var array_1 = value;
   for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    tint_symbol_8(buffer, (offset + (i_1 * 16u)), array[i_1]);
+    tint_symbol_31(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
   }
 }
 
 @compute @workgroup_size(1)
 fn main() {
-  tint_symbol(&(sb), 0u, i32());
-  tint_symbol_1(&(sb), 4u, u32());
-  tint_symbol_2(&(sb), 8u, f32());
-  tint_symbol_3(&(sb), 16u, vec2<i32>());
-  tint_symbol_4(&(sb), 24u, vec2<u32>());
-  tint_symbol_5(&(sb), 32u, vec2<f32>());
-  tint_symbol_6(&(sb), 48u, vec3<i32>());
-  tint_symbol_7(&(sb), 64u, vec3<u32>());
-  tint_symbol_8(&(sb), 80u, vec3<f32>());
-  tint_symbol_9(&(sb), 96u, vec4<i32>());
-  tint_symbol_10(&(sb), 112u, vec4<u32>());
-  tint_symbol_11(&(sb), 128u, vec4<f32>());
-  tint_symbol_12(&(sb), 144u, mat2x2<f32>());
-  tint_symbol_13(&(sb), 160u, mat2x3<f32>());
-  tint_symbol_14(&(sb), 192u, mat2x4<f32>());
-  tint_symbol_15(&(sb), 224u, mat3x2<f32>());
-  tint_symbol_16(&(sb), 256u, mat3x3<f32>());
-  tint_symbol_17(&(sb), 304u, mat3x4<f32>());
-  tint_symbol_18(&(sb), 352u, mat4x2<f32>());
-  tint_symbol_19(&(sb), 384u, mat4x3<f32>());
-  tint_symbol_20(&(sb), 448u, mat4x4<f32>());
-  tint_symbol_21(&(sb), 512u, array<vec3<f32>, 2>());
+  tint_symbol(&(sb), 0u, f32());
+  tint_symbol_1(&(sb), 4u, i32());
+  tint_symbol_2(&(sb), 8u, u32());
+  tint_symbol_3(&(sb), 12u, f16());
+  tint_symbol_4(&(sb), 16u, vec2<f32>());
+  tint_symbol_5(&(sb), 24u, vec2<i32>());
+  tint_symbol_6(&(sb), 32u, vec2<u32>());
+  tint_symbol_7(&(sb), 40u, vec2<f16>());
+  tint_symbol_8(&(sb), 48u, vec3<f32>());
+  tint_symbol_9(&(sb), 64u, vec3<i32>());
+  tint_symbol_10(&(sb), 80u, vec3<u32>());
+  tint_symbol_11(&(sb), 96u, vec3<f16>());
+  tint_symbol_12(&(sb), 112u, vec4<f32>());
+  tint_symbol_13(&(sb), 128u, vec4<i32>());
+  tint_symbol_14(&(sb), 144u, vec4<u32>());
+  tint_symbol_15(&(sb), 160u, vec4<f16>());
+  tint_symbol_16(&(sb), 168u, mat2x2<f32>());
+  tint_symbol_17(&(sb), 192u, mat2x3<f32>());
+  tint_symbol_18(&(sb), 224u, mat2x4<f32>());
+  tint_symbol_19(&(sb), 256u, mat3x2<f32>());
+  tint_symbol_20(&(sb), 288u, mat3x3<f32>());
+  tint_symbol_21(&(sb), 336u, mat3x4<f32>());
+  tint_symbol_22(&(sb), 384u, mat4x2<f32>());
+  tint_symbol_23(&(sb), 416u, mat4x3<f32>());
+  tint_symbol_24(&(sb), 480u, mat4x4<f32>());
+  tint_symbol_25(&(sb), 544u, mat2x2<f16>());
+  tint_symbol_26(&(sb), 552u, mat2x3<f16>());
+  tint_symbol_27(&(sb), 568u, mat2x4<f16>());
+  tint_symbol_28(&(sb), 584u, mat3x2<f16>());
+  tint_symbol_29(&(sb), 600u, mat3x3<f16>());
+  tint_symbol_30(&(sb), 624u, mat3x4<f16>());
+  tint_symbol_31(&(sb), 648u, mat4x2<f16>());
+  tint_symbol_32(&(sb), 664u, mat4x3<f16>());
+  tint_symbol_33(&(sb), 696u, mat4x4<f16>());
+  tint_symbol_34(&(sb), 736u, array<vec3<f32>, 2>());
+  tint_symbol_35(&(sb), 768u, array<mat4x2<f16>, 2>());
 }
 )";
 
@@ -1048,209 +1645,342 @@
 
 TEST_F(DecomposeMemoryAccessTest, SB_BasicStore_OutOfOrder) {
     auto* src = R"(
+enable f16;
+
 @compute @workgroup_size(1)
 fn main() {
-  sb.a = i32();
-  sb.b = u32();
-  sb.c = f32();
-  sb.d = vec2<i32>();
-  sb.e = vec2<u32>();
-  sb.f = vec2<f32>();
-  sb.g = vec3<i32>();
-  sb.h = vec3<u32>();
-  sb.i = vec3<f32>();
-  sb.j = vec4<i32>();
-  sb.k = vec4<u32>();
-  sb.l = vec4<f32>();
-  sb.m = mat2x2<f32>();
-  sb.n = mat2x3<f32>();
-  sb.o = mat2x4<f32>();
-  sb.p = mat3x2<f32>();
-  sb.q = mat3x3<f32>();
-  sb.r = mat3x4<f32>();
-  sb.s = mat4x2<f32>();
-  sb.t = mat4x3<f32>();
-  sb.u = mat4x4<f32>();
-  sb.v = array<vec3<f32>, 2>();
+  sb.scalar_f32 = f32();
+  sb.scalar_i32 = i32();
+  sb.scalar_u32 = u32();
+  sb.scalar_f16 = f16();
+  sb.vec2_f32 = vec2<f32>();
+  sb.vec2_i32 = vec2<i32>();
+  sb.vec2_u32 = vec2<u32>();
+  sb.vec2_f16 = vec2<f16>();
+  sb.vec3_f32 = vec3<f32>();
+  sb.vec3_i32 = vec3<i32>();
+  sb.vec3_u32 = vec3<u32>();
+  sb.vec3_f16 = vec3<f16>();
+  sb.vec4_f32 = vec4<f32>();
+  sb.vec4_i32 = vec4<i32>();
+  sb.vec4_u32 = vec4<u32>();
+  sb.vec4_f16 = vec4<f16>();
+  sb.mat2x2_f32 = mat2x2<f32>();
+  sb.mat2x3_f32 = mat2x3<f32>();
+  sb.mat2x4_f32 = mat2x4<f32>();
+  sb.mat3x2_f32 = mat3x2<f32>();
+  sb.mat3x3_f32 = mat3x3<f32>();
+  sb.mat3x4_f32 = mat3x4<f32>();
+  sb.mat4x2_f32 = mat4x2<f32>();
+  sb.mat4x3_f32 = mat4x3<f32>();
+  sb.mat4x4_f32 = mat4x4<f32>();
+  sb.mat2x2_f16 = mat2x2<f16>();
+  sb.mat2x3_f16 = mat2x3<f16>();
+  sb.mat2x4_f16 = mat2x4<f16>();
+  sb.mat3x2_f16 = mat3x2<f16>();
+  sb.mat3x3_f16 = mat3x3<f16>();
+  sb.mat3x4_f16 = mat3x4<f16>();
+  sb.mat4x2_f16 = mat4x2<f16>();
+  sb.mat4x3_f16 = mat4x3<f16>();
+  sb.mat4x4_f16 = mat4x4<f16>();
+  sb.arr2_vec3_f32 = array<vec3<f32>, 2>();
+  sb.arr2_mat4x2_f16 = array<mat4x2<f16>, 2>();
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 )";
 
     auto* expect = R"(
-@internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
-
-@internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+enable f16;
 
 @internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
 
-@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
+@internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
 
-@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+@internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+
+@internal(intrinsic_store_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f16)
 
 @internal(intrinsic_store_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
 
-@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
+@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
 
-@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+
+@internal(intrinsic_store_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f16>)
 
 @internal(intrinsic_store_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f32>)
 
-@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
 
-@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+
+@internal(intrinsic_store_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f16>)
 
 @internal(intrinsic_store_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+
+@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+
+@internal(intrinsic_store_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f16>)
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
-  tint_symbol_5(buffer, (offset + 16u), value[2u]);
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
+  tint_symbol_4(buffer, (offset + 16u), value[2u]);
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
   tint_symbol_8(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
-  tint_symbol_11(buffer, (offset + 32u), value[2u]);
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+  tint_symbol_12(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
-  tint_symbol_5(buffer, (offset + 16u), value[2u]);
-  tint_symbol_5(buffer, (offset + 24u), value[3u]);
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
+  tint_symbol_4(buffer, (offset + 16u), value[2u]);
+  tint_symbol_4(buffer, (offset + 24u), value[3u]);
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
   tint_symbol_8(buffer, (offset + 32u), value[2u]);
   tint_symbol_8(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
-  tint_symbol_11(buffer, (offset + 32u), value[2u]);
-  tint_symbol_11(buffer, (offset + 48u), value[3u]);
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+  tint_symbol_12(buffer, (offset + 32u), value[2u]);
+  tint_symbol_12(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+  tint_symbol_7(buffer, (offset + 8u), value[2u]);
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+  tint_symbol_11(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+  tint_symbol_15(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+  tint_symbol_7(buffer, (offset + 8u), value[2u]);
+  tint_symbol_7(buffer, (offset + 12u), value[3u]);
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+  tint_symbol_11(buffer, (offset + 16u), value[2u]);
+  tint_symbol_11(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+  tint_symbol_15(buffer, (offset + 16u), value[2u]);
+  tint_symbol_15(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
   var array = value;
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    tint_symbol_8(buffer, (offset + (i * 16u)), array[i]);
+  }
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<mat4x2<f16>, 2u>) {
+  var array_1 = value;
   for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    tint_symbol_8(buffer, (offset + (i_1 * 16u)), array[i_1]);
+    tint_symbol_31(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
   }
 }
 
 @compute @workgroup_size(1)
 fn main() {
-  tint_symbol(&(sb), 0u, i32());
-  tint_symbol_1(&(sb), 4u, u32());
-  tint_symbol_2(&(sb), 8u, f32());
-  tint_symbol_3(&(sb), 16u, vec2<i32>());
-  tint_symbol_4(&(sb), 24u, vec2<u32>());
-  tint_symbol_5(&(sb), 32u, vec2<f32>());
-  tint_symbol_6(&(sb), 48u, vec3<i32>());
-  tint_symbol_7(&(sb), 64u, vec3<u32>());
-  tint_symbol_8(&(sb), 80u, vec3<f32>());
-  tint_symbol_9(&(sb), 96u, vec4<i32>());
-  tint_symbol_10(&(sb), 112u, vec4<u32>());
-  tint_symbol_11(&(sb), 128u, vec4<f32>());
-  tint_symbol_12(&(sb), 144u, mat2x2<f32>());
-  tint_symbol_13(&(sb), 160u, mat2x3<f32>());
-  tint_symbol_14(&(sb), 192u, mat2x4<f32>());
-  tint_symbol_15(&(sb), 224u, mat3x2<f32>());
-  tint_symbol_16(&(sb), 256u, mat3x3<f32>());
-  tint_symbol_17(&(sb), 304u, mat3x4<f32>());
-  tint_symbol_18(&(sb), 352u, mat4x2<f32>());
-  tint_symbol_19(&(sb), 384u, mat4x3<f32>());
-  tint_symbol_20(&(sb), 448u, mat4x4<f32>());
-  tint_symbol_21(&(sb), 512u, array<vec3<f32>, 2>());
+  tint_symbol(&(sb), 0u, f32());
+  tint_symbol_1(&(sb), 4u, i32());
+  tint_symbol_2(&(sb), 8u, u32());
+  tint_symbol_3(&(sb), 12u, f16());
+  tint_symbol_4(&(sb), 16u, vec2<f32>());
+  tint_symbol_5(&(sb), 24u, vec2<i32>());
+  tint_symbol_6(&(sb), 32u, vec2<u32>());
+  tint_symbol_7(&(sb), 40u, vec2<f16>());
+  tint_symbol_8(&(sb), 48u, vec3<f32>());
+  tint_symbol_9(&(sb), 64u, vec3<i32>());
+  tint_symbol_10(&(sb), 80u, vec3<u32>());
+  tint_symbol_11(&(sb), 96u, vec3<f16>());
+  tint_symbol_12(&(sb), 112u, vec4<f32>());
+  tint_symbol_13(&(sb), 128u, vec4<i32>());
+  tint_symbol_14(&(sb), 144u, vec4<u32>());
+  tint_symbol_15(&(sb), 160u, vec4<f16>());
+  tint_symbol_16(&(sb), 168u, mat2x2<f32>());
+  tint_symbol_17(&(sb), 192u, mat2x3<f32>());
+  tint_symbol_18(&(sb), 224u, mat2x4<f32>());
+  tint_symbol_19(&(sb), 256u, mat3x2<f32>());
+  tint_symbol_20(&(sb), 288u, mat3x3<f32>());
+  tint_symbol_21(&(sb), 336u, mat3x4<f32>());
+  tint_symbol_22(&(sb), 384u, mat4x2<f32>());
+  tint_symbol_23(&(sb), 416u, mat4x3<f32>());
+  tint_symbol_24(&(sb), 480u, mat4x4<f32>());
+  tint_symbol_25(&(sb), 544u, mat2x2<f16>());
+  tint_symbol_26(&(sb), 552u, mat2x3<f16>());
+  tint_symbol_27(&(sb), 568u, mat2x4<f16>());
+  tint_symbol_28(&(sb), 584u, mat3x2<f16>());
+  tint_symbol_29(&(sb), 600u, mat3x3<f16>());
+  tint_symbol_30(&(sb), 624u, mat3x4<f16>());
+  tint_symbol_31(&(sb), 648u, mat4x2<f16>());
+  tint_symbol_32(&(sb), 664u, mat4x3<f16>());
+  tint_symbol_33(&(sb), 696u, mat4x4<f16>());
+  tint_symbol_34(&(sb), 736u, array<vec3<f32>, 2>());
+  tint_symbol_35(&(sb), 768u, array<mat4x2<f16>, 2>());
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 )";
 
@@ -1261,29 +1991,45 @@
 
 TEST_F(DecomposeMemoryAccessTest, LoadStructure) {
     auto* src = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
@@ -1295,115 +2041,187 @@
 )";
 
     auto* expect = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
+@internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
+
 @internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
 
 @internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
 
-@internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
-
-@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
-
-@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f16
 
 @internal(intrinsic_load_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)));
+@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)), tint_symbol_6(buffer, (offset + 16u)));
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)), tint_symbol_9(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)), tint_symbol_13(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)), tint_symbol_6(buffer, (offset + 16u)), tint_symbol_6(buffer, (offset + 24u)));
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)), tint_symbol_9(buffer, (offset + 32u)), tint_symbol_9(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)), tint_symbol_13(buffer, (offset + 32u)), tint_symbol_13(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)), tint_symbol_8(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)), tint_symbol_12(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)), tint_symbol_16(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)), tint_symbol_8(buffer, (offset + 8u)), tint_symbol_8(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)), tint_symbol_16(buffer, (offset + 16u)), tint_symbol_16(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_9(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_9(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_36(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<mat4x2<f16>, 2u> {
+  var arr_1 : array<mat4x2<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_32(buffer, (offset + (i_1 * 16u)));
+  }
+  return arr_1;
+}
+
 fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> SB {
-  return SB(tint_symbol_1(buffer, (offset + 0u)), tint_symbol_2(buffer, (offset + 4u)), tint_symbol_3(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)), tint_symbol_6(buffer, (offset + 32u)), tint_symbol_7(buffer, (offset + 48u)), tint_symbol_8(buffer, (offset + 64u)), tint_symbol_9(buffer, (offset + 80u)), tint_symbol_10(buffer, (offset + 96u)), tint_symbol_11(buffer, (offset + 112u)), tint_symbol_12(buffer, (offset + 128u)), tint_symbol_13(buffer, (offset + 144u)), tint_symbol_14(buffer, (offset + 160u)), tint_symbol_15(buffer, (offset + 192u)), tint_symbol_16(buffer, (offset + 224u)), tint_symbol_17(buffer, (offset + 256u)), tint_symbol_18(buffer, (offset + 304u)), tint_symbol_19(buffer, (offset + 352u)), tint_symbol_20(buffer, (offset + 384u)), tint_symbol_21(buffer, (offset + 448u)), tint_symbol_22(buffer, (offset + 512u)));
+  return SB(tint_symbol_1(buffer, (offset + 0u)), tint_symbol_2(buffer, (offset + 4u)), tint_symbol_3(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 12u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_6(buffer, (offset + 24u)), tint_symbol_7(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 40u)), tint_symbol_9(buffer, (offset + 48u)), tint_symbol_10(buffer, (offset + 64u)), tint_symbol_11(buffer, (offset + 80u)), tint_symbol_12(buffer, (offset + 96u)), tint_symbol_13(buffer, (offset + 112u)), tint_symbol_14(buffer, (offset + 128u)), tint_symbol_15(buffer, (offset + 144u)), tint_symbol_16(buffer, (offset + 160u)), tint_symbol_17(buffer, (offset + 168u)), tint_symbol_18(buffer, (offset + 192u)), tint_symbol_19(buffer, (offset + 224u)), tint_symbol_20(buffer, (offset + 256u)), tint_symbol_21(buffer, (offset + 288u)), tint_symbol_22(buffer, (offset + 336u)), tint_symbol_23(buffer, (offset + 384u)), tint_symbol_24(buffer, (offset + 416u)), tint_symbol_25(buffer, (offset + 480u)), tint_symbol_26(buffer, (offset + 544u)), tint_symbol_27(buffer, (offset + 552u)), tint_symbol_28(buffer, (offset + 568u)), tint_symbol_29(buffer, (offset + 584u)), tint_symbol_30(buffer, (offset + 600u)), tint_symbol_31(buffer, (offset + 624u)), tint_symbol_32(buffer, (offset + 648u)), tint_symbol_33(buffer, (offset + 664u)), tint_symbol_34(buffer, (offset + 696u)), tint_symbol_35(buffer, (offset + 736u)), tint_symbol_36(buffer, (offset + 768u)));
 }
 
 @compute @workgroup_size(1)
@@ -1419,6 +2237,8 @@
 
 TEST_F(DecomposeMemoryAccessTest, LoadStructure_OutOfOrder) {
     auto* src = R"(
+enable f16;
+
 @compute @workgroup_size(1)
 fn main() {
   var x : SB = sb;
@@ -1427,114 +2247,186 @@
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 )";
 
     auto* expect = R"(
-@internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
-
-@internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+enable f16;
 
 @internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
 
-@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
+@internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
 
-@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+
+@internal(intrinsic_load_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f16
 
 @internal(intrinsic_load_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)));
+@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)), tint_symbol_6(buffer, (offset + 16u)));
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)), tint_symbol_9(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)), tint_symbol_13(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)), tint_symbol_6(buffer, (offset + 16u)), tint_symbol_6(buffer, (offset + 24u)));
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)), tint_symbol_9(buffer, (offset + 32u)), tint_symbol_9(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)), tint_symbol_13(buffer, (offset + 32u)), tint_symbol_13(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)), tint_symbol_8(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)), tint_symbol_12(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)), tint_symbol_16(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)), tint_symbol_8(buffer, (offset + 8u)), tint_symbol_8(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)), tint_symbol_16(buffer, (offset + 16u)), tint_symbol_16(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_9(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_9(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_36(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<mat4x2<f16>, 2u> {
+  var arr_1 : array<mat4x2<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_32(buffer, (offset + (i_1 * 16u)));
+  }
+  return arr_1;
+}
+
 fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> SB {
-  return SB(tint_symbol_1(buffer, (offset + 0u)), tint_symbol_2(buffer, (offset + 4u)), tint_symbol_3(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)), tint_symbol_6(buffer, (offset + 32u)), tint_symbol_7(buffer, (offset + 48u)), tint_symbol_8(buffer, (offset + 64u)), tint_symbol_9(buffer, (offset + 80u)), tint_symbol_10(buffer, (offset + 96u)), tint_symbol_11(buffer, (offset + 112u)), tint_symbol_12(buffer, (offset + 128u)), tint_symbol_13(buffer, (offset + 144u)), tint_symbol_14(buffer, (offset + 160u)), tint_symbol_15(buffer, (offset + 192u)), tint_symbol_16(buffer, (offset + 224u)), tint_symbol_17(buffer, (offset + 256u)), tint_symbol_18(buffer, (offset + 304u)), tint_symbol_19(buffer, (offset + 352u)), tint_symbol_20(buffer, (offset + 384u)), tint_symbol_21(buffer, (offset + 448u)), tint_symbol_22(buffer, (offset + 512u)));
+  return SB(tint_symbol_1(buffer, (offset + 0u)), tint_symbol_2(buffer, (offset + 4u)), tint_symbol_3(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 12u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_6(buffer, (offset + 24u)), tint_symbol_7(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 40u)), tint_symbol_9(buffer, (offset + 48u)), tint_symbol_10(buffer, (offset + 64u)), tint_symbol_11(buffer, (offset + 80u)), tint_symbol_12(buffer, (offset + 96u)), tint_symbol_13(buffer, (offset + 112u)), tint_symbol_14(buffer, (offset + 128u)), tint_symbol_15(buffer, (offset + 144u)), tint_symbol_16(buffer, (offset + 160u)), tint_symbol_17(buffer, (offset + 168u)), tint_symbol_18(buffer, (offset + 192u)), tint_symbol_19(buffer, (offset + 224u)), tint_symbol_20(buffer, (offset + 256u)), tint_symbol_21(buffer, (offset + 288u)), tint_symbol_22(buffer, (offset + 336u)), tint_symbol_23(buffer, (offset + 384u)), tint_symbol_24(buffer, (offset + 416u)), tint_symbol_25(buffer, (offset + 480u)), tint_symbol_26(buffer, (offset + 544u)), tint_symbol_27(buffer, (offset + 552u)), tint_symbol_28(buffer, (offset + 568u)), tint_symbol_29(buffer, (offset + 584u)), tint_symbol_30(buffer, (offset + 600u)), tint_symbol_31(buffer, (offset + 624u)), tint_symbol_32(buffer, (offset + 648u)), tint_symbol_33(buffer, (offset + 664u)), tint_symbol_34(buffer, (offset + 696u)), tint_symbol_35(buffer, (offset + 736u)), tint_symbol_36(buffer, (offset + 768u)));
 }
 
 @compute @workgroup_size(1)
@@ -1545,28 +2437,42 @@
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 )";
 
@@ -1577,29 +2483,45 @@
 
 TEST_F(DecomposeMemoryAccessTest, StoreStructure) {
     auto* src = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
@@ -1611,153 +2533,256 @@
 )";
 
     auto* expect = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
+@internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
+
 @internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
 
 @internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
 
-@internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
-
-@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
-
-@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+@internal(intrinsic_store_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f16)
 
 @internal(intrinsic_store_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
 
-@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
+@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
 
-@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+
+@internal(intrinsic_store_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f16>)
 
 @internal(intrinsic_store_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f32>)
 
-@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
 
-@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+
+@internal(intrinsic_store_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f16>)
 
 @internal(intrinsic_store_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
+@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+
+@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+
+@internal(intrinsic_store_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f16>)
+
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
-  tint_symbol_6(buffer, (offset + 16u), value[2u]);
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+  tint_symbol_5(buffer, (offset + 16u), value[2u]);
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
   tint_symbol_9(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
-  tint_symbol_12(buffer, (offset + 32u), value[2u]);
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
+  tint_symbol_13(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
-  tint_symbol_6(buffer, (offset + 16u), value[2u]);
-  tint_symbol_6(buffer, (offset + 24u), value[3u]);
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+  tint_symbol_5(buffer, (offset + 16u), value[2u]);
+  tint_symbol_5(buffer, (offset + 24u), value[3u]);
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
   tint_symbol_9(buffer, (offset + 32u), value[2u]);
   tint_symbol_9(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
-  tint_symbol_12(buffer, (offset + 32u), value[2u]);
-  tint_symbol_12(buffer, (offset + 48u), value[3u]);
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
+  tint_symbol_13(buffer, (offset + 32u), value[2u]);
+  tint_symbol_13(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+  tint_symbol_8(buffer, (offset + 8u), value[2u]);
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+  tint_symbol_12(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+  tint_symbol_16(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+  tint_symbol_8(buffer, (offset + 8u), value[2u]);
+  tint_symbol_8(buffer, (offset + 12u), value[3u]);
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+  tint_symbol_12(buffer, (offset + 16u), value[2u]);
+  tint_symbol_12(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+  tint_symbol_16(buffer, (offset + 16u), value[2u]);
+  tint_symbol_16(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
   var array = value;
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    tint_symbol_9(buffer, (offset + (i * 16u)), array[i]);
+  }
+}
+
+fn tint_symbol_36(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<mat4x2<f16>, 2u>) {
+  var array_1 = value;
   for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    tint_symbol_9(buffer, (offset + (i_1 * 16u)), array[i_1]);
+    tint_symbol_32(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
   }
 }
 
 fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : SB) {
-  tint_symbol_1(buffer, (offset + 0u), value.a);
-  tint_symbol_2(buffer, (offset + 4u), value.b);
-  tint_symbol_3(buffer, (offset + 8u), value.c);
-  tint_symbol_4(buffer, (offset + 16u), value.d);
-  tint_symbol_5(buffer, (offset + 24u), value.e);
-  tint_symbol_6(buffer, (offset + 32u), value.f);
-  tint_symbol_7(buffer, (offset + 48u), value.g);
-  tint_symbol_8(buffer, (offset + 64u), value.h);
-  tint_symbol_9(buffer, (offset + 80u), value.i);
-  tint_symbol_10(buffer, (offset + 96u), value.j);
-  tint_symbol_11(buffer, (offset + 112u), value.k);
-  tint_symbol_12(buffer, (offset + 128u), value.l);
-  tint_symbol_13(buffer, (offset + 144u), value.m);
-  tint_symbol_14(buffer, (offset + 160u), value.n);
-  tint_symbol_15(buffer, (offset + 192u), value.o);
-  tint_symbol_16(buffer, (offset + 224u), value.p);
-  tint_symbol_17(buffer, (offset + 256u), value.q);
-  tint_symbol_18(buffer, (offset + 304u), value.r);
-  tint_symbol_19(buffer, (offset + 352u), value.s);
-  tint_symbol_20(buffer, (offset + 384u), value.t);
-  tint_symbol_21(buffer, (offset + 448u), value.u);
-  tint_symbol_22(buffer, (offset + 512u), value.v);
+  tint_symbol_1(buffer, (offset + 0u), value.scalar_f32);
+  tint_symbol_2(buffer, (offset + 4u), value.scalar_i32);
+  tint_symbol_3(buffer, (offset + 8u), value.scalar_u32);
+  tint_symbol_4(buffer, (offset + 12u), value.scalar_f16);
+  tint_symbol_5(buffer, (offset + 16u), value.vec2_f32);
+  tint_symbol_6(buffer, (offset + 24u), value.vec2_i32);
+  tint_symbol_7(buffer, (offset + 32u), value.vec2_u32);
+  tint_symbol_8(buffer, (offset + 40u), value.vec2_f16);
+  tint_symbol_9(buffer, (offset + 48u), value.vec3_f32);
+  tint_symbol_10(buffer, (offset + 64u), value.vec3_i32);
+  tint_symbol_11(buffer, (offset + 80u), value.vec3_u32);
+  tint_symbol_12(buffer, (offset + 96u), value.vec3_f16);
+  tint_symbol_13(buffer, (offset + 112u), value.vec4_f32);
+  tint_symbol_14(buffer, (offset + 128u), value.vec4_i32);
+  tint_symbol_15(buffer, (offset + 144u), value.vec4_u32);
+  tint_symbol_16(buffer, (offset + 160u), value.vec4_f16);
+  tint_symbol_17(buffer, (offset + 168u), value.mat2x2_f32);
+  tint_symbol_18(buffer, (offset + 192u), value.mat2x3_f32);
+  tint_symbol_19(buffer, (offset + 224u), value.mat2x4_f32);
+  tint_symbol_20(buffer, (offset + 256u), value.mat3x2_f32);
+  tint_symbol_21(buffer, (offset + 288u), value.mat3x3_f32);
+  tint_symbol_22(buffer, (offset + 336u), value.mat3x4_f32);
+  tint_symbol_23(buffer, (offset + 384u), value.mat4x2_f32);
+  tint_symbol_24(buffer, (offset + 416u), value.mat4x3_f32);
+  tint_symbol_25(buffer, (offset + 480u), value.mat4x4_f32);
+  tint_symbol_26(buffer, (offset + 544u), value.mat2x2_f16);
+  tint_symbol_27(buffer, (offset + 552u), value.mat2x3_f16);
+  tint_symbol_28(buffer, (offset + 568u), value.mat2x4_f16);
+  tint_symbol_29(buffer, (offset + 584u), value.mat3x2_f16);
+  tint_symbol_30(buffer, (offset + 600u), value.mat3x3_f16);
+  tint_symbol_31(buffer, (offset + 624u), value.mat3x4_f16);
+  tint_symbol_32(buffer, (offset + 648u), value.mat4x2_f16);
+  tint_symbol_33(buffer, (offset + 664u), value.mat4x3_f16);
+  tint_symbol_34(buffer, (offset + 696u), value.mat4x4_f16);
+  tint_symbol_35(buffer, (offset + 736u), value.arr2_vec3_f32);
+  tint_symbol_36(buffer, (offset + 768u), value.arr2_mat4x2_f16);
 }
 
 @compute @workgroup_size(1)
@@ -1773,6 +2798,8 @@
 
 TEST_F(DecomposeMemoryAccessTest, StoreStructure_OutOfOrder) {
     auto* src = R"(
+enable f16;
+
 @compute @workgroup_size(1)
 fn main() {
   sb = SB();
@@ -1781,152 +2808,255 @@
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 )";
 
     auto* expect = R"(
-@internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
-
-@internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+enable f16;
 
 @internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
 
-@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
+@internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
 
-@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+@internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+
+@internal(intrinsic_store_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f16)
 
 @internal(intrinsic_store_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
 
-@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
+@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
 
-@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+
+@internal(intrinsic_store_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f16>)
 
 @internal(intrinsic_store_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f32>)
 
-@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
 
-@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+
+@internal(intrinsic_store_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f16>)
 
 @internal(intrinsic_store_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
+@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+
+@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+
+@internal(intrinsic_store_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f16>)
+
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
-  tint_symbol_6(buffer, (offset + 16u), value[2u]);
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+  tint_symbol_5(buffer, (offset + 16u), value[2u]);
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
   tint_symbol_9(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
-  tint_symbol_12(buffer, (offset + 32u), value[2u]);
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
+  tint_symbol_13(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
-  tint_symbol_6(buffer, (offset + 16u), value[2u]);
-  tint_symbol_6(buffer, (offset + 24u), value[3u]);
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+  tint_symbol_5(buffer, (offset + 16u), value[2u]);
+  tint_symbol_5(buffer, (offset + 24u), value[3u]);
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
   tint_symbol_9(buffer, (offset + 32u), value[2u]);
   tint_symbol_9(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
-  tint_symbol_12(buffer, (offset + 32u), value[2u]);
-  tint_symbol_12(buffer, (offset + 48u), value[3u]);
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
+  tint_symbol_13(buffer, (offset + 32u), value[2u]);
+  tint_symbol_13(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+  tint_symbol_8(buffer, (offset + 8u), value[2u]);
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+  tint_symbol_12(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+  tint_symbol_16(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+  tint_symbol_8(buffer, (offset + 8u), value[2u]);
+  tint_symbol_8(buffer, (offset + 12u), value[3u]);
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+  tint_symbol_12(buffer, (offset + 16u), value[2u]);
+  tint_symbol_12(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+  tint_symbol_16(buffer, (offset + 16u), value[2u]);
+  tint_symbol_16(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
   var array = value;
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    tint_symbol_9(buffer, (offset + (i * 16u)), array[i]);
+  }
+}
+
+fn tint_symbol_36(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<mat4x2<f16>, 2u>) {
+  var array_1 = value;
   for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    tint_symbol_9(buffer, (offset + (i_1 * 16u)), array[i_1]);
+    tint_symbol_32(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
   }
 }
 
 fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : SB) {
-  tint_symbol_1(buffer, (offset + 0u), value.a);
-  tint_symbol_2(buffer, (offset + 4u), value.b);
-  tint_symbol_3(buffer, (offset + 8u), value.c);
-  tint_symbol_4(buffer, (offset + 16u), value.d);
-  tint_symbol_5(buffer, (offset + 24u), value.e);
-  tint_symbol_6(buffer, (offset + 32u), value.f);
-  tint_symbol_7(buffer, (offset + 48u), value.g);
-  tint_symbol_8(buffer, (offset + 64u), value.h);
-  tint_symbol_9(buffer, (offset + 80u), value.i);
-  tint_symbol_10(buffer, (offset + 96u), value.j);
-  tint_symbol_11(buffer, (offset + 112u), value.k);
-  tint_symbol_12(buffer, (offset + 128u), value.l);
-  tint_symbol_13(buffer, (offset + 144u), value.m);
-  tint_symbol_14(buffer, (offset + 160u), value.n);
-  tint_symbol_15(buffer, (offset + 192u), value.o);
-  tint_symbol_16(buffer, (offset + 224u), value.p);
-  tint_symbol_17(buffer, (offset + 256u), value.q);
-  tint_symbol_18(buffer, (offset + 304u), value.r);
-  tint_symbol_19(buffer, (offset + 352u), value.s);
-  tint_symbol_20(buffer, (offset + 384u), value.t);
-  tint_symbol_21(buffer, (offset + 448u), value.u);
-  tint_symbol_22(buffer, (offset + 512u), value.v);
+  tint_symbol_1(buffer, (offset + 0u), value.scalar_f32);
+  tint_symbol_2(buffer, (offset + 4u), value.scalar_i32);
+  tint_symbol_3(buffer, (offset + 8u), value.scalar_u32);
+  tint_symbol_4(buffer, (offset + 12u), value.scalar_f16);
+  tint_symbol_5(buffer, (offset + 16u), value.vec2_f32);
+  tint_symbol_6(buffer, (offset + 24u), value.vec2_i32);
+  tint_symbol_7(buffer, (offset + 32u), value.vec2_u32);
+  tint_symbol_8(buffer, (offset + 40u), value.vec2_f16);
+  tint_symbol_9(buffer, (offset + 48u), value.vec3_f32);
+  tint_symbol_10(buffer, (offset + 64u), value.vec3_i32);
+  tint_symbol_11(buffer, (offset + 80u), value.vec3_u32);
+  tint_symbol_12(buffer, (offset + 96u), value.vec3_f16);
+  tint_symbol_13(buffer, (offset + 112u), value.vec4_f32);
+  tint_symbol_14(buffer, (offset + 128u), value.vec4_i32);
+  tint_symbol_15(buffer, (offset + 144u), value.vec4_u32);
+  tint_symbol_16(buffer, (offset + 160u), value.vec4_f16);
+  tint_symbol_17(buffer, (offset + 168u), value.mat2x2_f32);
+  tint_symbol_18(buffer, (offset + 192u), value.mat2x3_f32);
+  tint_symbol_19(buffer, (offset + 224u), value.mat2x4_f32);
+  tint_symbol_20(buffer, (offset + 256u), value.mat3x2_f32);
+  tint_symbol_21(buffer, (offset + 288u), value.mat3x3_f32);
+  tint_symbol_22(buffer, (offset + 336u), value.mat3x4_f32);
+  tint_symbol_23(buffer, (offset + 384u), value.mat4x2_f32);
+  tint_symbol_24(buffer, (offset + 416u), value.mat4x3_f32);
+  tint_symbol_25(buffer, (offset + 480u), value.mat4x4_f32);
+  tint_symbol_26(buffer, (offset + 544u), value.mat2x2_f16);
+  tint_symbol_27(buffer, (offset + 552u), value.mat2x3_f16);
+  tint_symbol_28(buffer, (offset + 568u), value.mat2x4_f16);
+  tint_symbol_29(buffer, (offset + 584u), value.mat3x2_f16);
+  tint_symbol_30(buffer, (offset + 600u), value.mat3x3_f16);
+  tint_symbol_31(buffer, (offset + 624u), value.mat3x4_f16);
+  tint_symbol_32(buffer, (offset + 648u), value.mat4x2_f16);
+  tint_symbol_33(buffer, (offset + 664u), value.mat4x3_f16);
+  tint_symbol_34(buffer, (offset + 696u), value.mat4x4_f16);
+  tint_symbol_35(buffer, (offset + 736u), value.arr2_vec3_f32);
+  tint_symbol_36(buffer, (offset + 768u), value.arr2_mat4x2_f16);
 }
 
 @compute @workgroup_size(1)
@@ -1937,28 +3067,42 @@
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 )";
 
diff --git a/src/tint/transform/std140.cc b/src/tint/transform/std140.cc
index 8b566fe..a371f24 100644
--- a/src/tint/transform/std140.cc
+++ b/src/tint/transform/std140.cc
@@ -265,8 +265,8 @@
     };
 
     /// @returns true if the given matrix needs decomposing to column vectors for std140 layout.
-    /// TODO(crbug.com/tint/1502): This may need adjusting for `f16` matrices.
-    static bool MatrixNeedsDecomposing(const sem::Matrix* mat) { return mat->ColumnStride() == 8; }
+    /// Std140 layout require matrix stride to be 16, otherwise decomposing is needed.
+    static bool MatrixNeedsDecomposing(const sem::Matrix* mat) { return mat->ColumnStride() != 16; }
 
     /// ForkTypes walks the user-declared types in dependency order, forking structures that are
     /// used as uniform buffers which (transitively) use matrices that need std140 decomposition to
@@ -474,7 +474,7 @@
                 // natural size for the matrix. This extra padding needs to be
                 // applied to the last column vector.
                 attributes.Push(
-                    b.MemberSize(AInt(size - mat->ColumnType()->Size() * (num_columns - 1))));
+                    b.MemberSize(AInt(size - mat->ColumnType()->Align() * (num_columns - 1))));
             }
 
             // Build the member
@@ -645,7 +645,8 @@
                 return "mat" + std::to_string(mat->columns()) + "x" + std::to_string(mat->rows()) +
                        "_" + ConvertSuffix(mat->type());
             },
-            [&](const sem::F32*) { return "f32"; },
+            [&](const sem::F32*) { return "f32"; },  //
+            [&](const sem::F16*) { return "f16"; },
             [&](Default) {
                 TINT_ICE(Transform, b.Diagnostics())
                     << "unhandled type for conversion name: " << src->FriendlyName(ty);
diff --git a/src/tint/transform/std140.h b/src/tint/transform/std140.h
index 49e663d..769932f 100644
--- a/src/tint/transform/std140.h
+++ b/src/tint/transform/std140.h
@@ -20,11 +20,12 @@
 namespace tint::transform {
 
 /// Std140 is a transform that forks types used in the uniform address space that contain
-/// `matNx2<f32>` matrices into `N`x`vec2<f32>` column vectors. Types that transitively use these
-/// forked types are also forked. `var<uniform>` variables will use these forked types, and
-/// expressions loading from these variables will do appropriate conversions to the regular WGSL
-/// types. As `matNx2<f32>` matrices are the only type that violate std140-layout, this
-/// transformation is sufficient to have any WGSL structure be std140-layout conformant.
+/// `matNx2<f32>` matrices into `N`x`vec2<f32>` column vectors, and `matNxM<f16>` matrices into
+/// `N`x`vecM<f16>` column vectors. Types that transitively use these forked types are also forked.
+/// `var<uniform>` variables will use these forked types, and expressions loading from these
+/// variables will do appropriate conversions to the regular WGSL types. As `matNx2<f32>` and
+/// `matNxM<f16>` matrices are the only type that violate std140-layout, this transformation is
+/// sufficient to have any WGSL structure be std140-layout conformant.
 ///
 /// @note This transform requires the PromoteSideEffectsToDecl transform to have been run first.
 class Std140 final : public Castable<Std140, Transform> {
diff --git a/src/tint/transform/std140_exhaustive_test.cc b/src/tint/transform/std140_exhaustive_test.cc
index 01d2dae..f50e1c4 100644
--- a/src/tint/transform/std140_exhaustive_test.cc
+++ b/src/tint/transform/std140_exhaustive_test.cc
@@ -2838,6 +2838,15 @@
                              {4, 2, MatrixType::f32},
                              {4, 3, MatrixType::f32},
                              {4, 4, MatrixType::f32},
+                             {2, 2, MatrixType::f16},
+                             {2, 3, MatrixType::f16},
+                             {2, 4, MatrixType::f16},
+                             {3, 2, MatrixType::f16},
+                             {3, 3, MatrixType::f16},
+                             {3, 4, MatrixType::f16},
+                             {4, 2, MatrixType::f16},
+                             {4, 3, MatrixType::f16},
+                             {4, 4, MatrixType::f16},
                          }));
 
 using Std140Test_MatrixArray = TransformTestWithParam<MatrixCase>;
@@ -4866,6 +4875,15 @@
                              {4, 2, MatrixType::f32},
                              {4, 3, MatrixType::f32},
                              {4, 4, MatrixType::f32},
+                             {2, 2, MatrixType::f16},
+                             {2, 3, MatrixType::f16},
+                             {2, 4, MatrixType::f16},
+                             {3, 2, MatrixType::f16},
+                             {3, 3, MatrixType::f16},
+                             {3, 4, MatrixType::f16},
+                             {4, 2, MatrixType::f16},
+                             {4, 3, MatrixType::f16},
+                             {4, 4, MatrixType::f16},
                          }));
 
 }  // namespace
diff --git a/src/tint/transform/std140_f16_test.cc b/src/tint/transform/std140_f16_test.cc
new file mode 100644
index 0000000..898bb73
--- /dev/null
+++ b/src/tint/transform/std140_f16_test.cc
@@ -0,0 +1,3596 @@
+// Copyright 2022 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/tint/transform/std140.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "src/tint/transform/test_helper.h"
+#include "src/tint/utils/string.h"
+
+namespace tint::transform {
+namespace {
+
+using Std140Test_F16 = TransformTest;
+
+TEST_F(Std140Test_F16, StructMatricesUniform) {
+    auto* src = R"(
+enable f16;
+
+struct S2x2F16 {
+  m : mat2x2<f16>,
+}
+struct S3x2F16 {
+  m : mat3x2<f16>,
+}
+struct S4x2F16 {
+  m : mat4x2<f16>,
+}
+struct S2x3F16 {
+  m : mat2x3<f16>,
+}
+struct S3x3F16 {
+  m : mat3x3<f16>,
+}
+struct S4x3F16 {
+  m : mat4x3<f16>,
+}
+struct S2x4F16 {
+  m : mat2x4<f16>,
+}
+struct S3x4F16 {
+  m : mat3x4<f16>,
+}
+struct S4x4F16 {
+  m : mat4x4<f16>,
+}
+
+@group(2) @binding(2) var<uniform> s2x2f16 : S2x2F16;
+@group(3) @binding(2) var<uniform> s3x2f16 : S3x2F16;
+@group(4) @binding(2) var<uniform> s4x2f16 : S4x2F16;
+@group(2) @binding(3) var<uniform> s2x3f16 : S2x3F16;
+@group(3) @binding(3) var<uniform> s3x3f16 : S3x3F16;
+@group(4) @binding(3) var<uniform> s4x3f16 : S4x3F16;
+@group(2) @binding(4) var<uniform> s2x4f16 : S2x4F16;
+@group(3) @binding(4) var<uniform> s3x4f16 : S3x4F16;
+@group(4) @binding(4) var<uniform> s4x4f16 : S4x4F16;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S2x2F16 {
+  m : mat2x2<f16>,
+}
+
+struct S2x2F16_std140 {
+  m_0 : vec2<f16>,
+  m_1 : vec2<f16>,
+}
+
+struct S3x2F16 {
+  m : mat3x2<f16>,
+}
+
+struct S3x2F16_std140 {
+  m_0 : vec2<f16>,
+  m_1 : vec2<f16>,
+  m_2 : vec2<f16>,
+}
+
+struct S4x2F16 {
+  m : mat4x2<f16>,
+}
+
+struct S4x2F16_std140 {
+  m_0 : vec2<f16>,
+  m_1 : vec2<f16>,
+  m_2 : vec2<f16>,
+  m_3 : vec2<f16>,
+}
+
+struct S2x3F16 {
+  m : mat2x3<f16>,
+}
+
+struct S2x3F16_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+struct S3x3F16 {
+  m : mat3x3<f16>,
+}
+
+struct S3x3F16_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+  m_2 : vec3<f16>,
+}
+
+struct S4x3F16 {
+  m : mat4x3<f16>,
+}
+
+struct S4x3F16_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+  m_2 : vec3<f16>,
+  m_3 : vec3<f16>,
+}
+
+struct S2x4F16 {
+  m : mat2x4<f16>,
+}
+
+struct S2x4F16_std140 {
+  m_0 : vec4<f16>,
+  m_1 : vec4<f16>,
+}
+
+struct S3x4F16 {
+  m : mat3x4<f16>,
+}
+
+struct S3x4F16_std140 {
+  m_0 : vec4<f16>,
+  m_1 : vec4<f16>,
+  m_2 : vec4<f16>,
+}
+
+struct S4x4F16 {
+  m : mat4x4<f16>,
+}
+
+struct S4x4F16_std140 {
+  m_0 : vec4<f16>,
+  m_1 : vec4<f16>,
+  m_2 : vec4<f16>,
+  m_3 : vec4<f16>,
+}
+
+@group(2) @binding(2) var<uniform> s2x2f16 : S2x2F16_std140;
+
+@group(3) @binding(2) var<uniform> s3x2f16 : S3x2F16_std140;
+
+@group(4) @binding(2) var<uniform> s4x2f16 : S4x2F16_std140;
+
+@group(2) @binding(3) var<uniform> s2x3f16 : S2x3F16_std140;
+
+@group(3) @binding(3) var<uniform> s3x3f16 : S3x3F16_std140;
+
+@group(4) @binding(3) var<uniform> s4x3f16 : S4x3F16_std140;
+
+@group(2) @binding(4) var<uniform> s2x4f16 : S2x4F16_std140;
+
+@group(3) @binding(4) var<uniform> s3x4f16 : S3x4F16_std140;
+
+@group(4) @binding(4) var<uniform> s4x4f16 : S4x4F16_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+// In the following tests we only test `mat2x3<f16>`, and set all constant column index to 1, row
+// index 0, inner array index 2, and outer array index 3. For exhaustive tests, i.e. tests on all
+// matrix shape and different valid constant index, please refer to std140_exhaustive_test.cc
+
+TEST_F(Std140Test_F16, SingleStructMatUniform_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, CustomAlign_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @align(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @align(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  @align(128i)
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, CustomSizeMat_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @size(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @size(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  m_0 : vec3<f16>,
+  @size(120)
+  m_1 : vec3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, CustomAlignAndSize_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @align(128) @size(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @align(128) @size(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  @align(128i)
+  m_0 : vec3<f16>,
+  @size(120)
+  m_1 : vec3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatrixUsageInForLoop_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  for(var i = u32(s.m[0][0]); (i < u32(s.m[i][1])); i += u32(s.m[1][i])) {
+  }
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_1(p0 : u32) -> f16 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[1u];
+    }
+    case 1u: {
+      return s.m_1[1u];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  for(var i = u32(s.m_0[0u]); (i < u32(load_s_m_p0_1(u32(i)))); i += u32(s.m_1[i])) {
+  }
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadMatrix_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x3<f16>;
+
+fn f() {
+  let l = m;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> m : mat2x3_f16;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x3_f16(m);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadColumn_ConstIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let l = a[1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn f() {
+  let l = a.col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadColumn_VariableIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn load_a_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return a.col0;
+    }
+    case 1u: {
+      return a.col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadColumnSwizzle_ConstIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let l = a[1].yzx;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn f() {
+  let l = a.col1.yzx;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadColumnSwizzle_VariableIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].yzx;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn load_a_p0_yzx(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return a.col0.yzx;
+    }
+    case 1u: {
+      return a.col1.yzx;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_yzx(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let l = a[1][0];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn f() {
+  let l = a.col1[0u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let I = 0;
+  let l = a[I][0];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn load_a_p0_0(p0 : u32) -> f16 {
+  switch(p0) {
+    case 0u: {
+      return a.col0[0u];
+    }
+    case 1u: {
+      return a.col1[0u];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_a_p0_0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let I = 0;
+  let l = a[1][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn f() {
+  let I = 0;
+  let l = a.col1[I];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let I = 0;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn load_a_p0_p1(p0 : u32, p1 : u32) -> f16 {
+  switch(p0) {
+    case 0u: {
+      return a.col0[p1];
+    }
+    case 1u: {
+      return a.col1[p1];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_NameCollision_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m_1 : i32,
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m_1 : i32,
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_1 : i32,
+  m__0 : vec3<f16>,
+  m__1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadStruct_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn f() {
+  let l = conv_S(s);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadMatrix_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m() -> mat2x3<f16> {
+  let s = &(s);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn f() {
+  let l = load_s_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadColumn_ConstIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadColumn_VariableIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return s.m_0;
+    }
+    case 1u: {
+      return s.m_1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[1][0];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_1[0u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I][0];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_0(p0 : u32) -> f16 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[0u];
+    }
+    case 1u: {
+      return s.m_1[0u];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0_0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[1][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let I = 0;
+  let l = s.m_1[I];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_p1(p0 : u32, p1 : u32) -> f16 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[p1];
+    }
+    case 1u: {
+      return s.m_1[p1];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_LoadArray_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn conv_arr3_S(val : array<S_std140, 3u>) -> array<S, 3u> {
+  var arr : array<S, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_S(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_S(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_LoadStruct_ConstIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn f() {
+  let l = conv_S(a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_LoadStruct_VariableIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_S(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_LoadMatrix_ConstArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2].m;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_2_m() -> mat2x3<f16> {
+  let s = &(a[2u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn f() {
+  let l = load_a_2_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_LoadMatrix_VariableArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_p0_m(p0 : u32) -> mat2x3<f16> {
+  let s = &(a[p0]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_m(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2].m[1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let l = a[2u].m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m[1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[2].m[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_2_m_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return a[2u].m_0;
+    }
+    case 1u: {
+      return a[2u].m_1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_2_m_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_p0_m_p1(p0 : u32, p1 : u32) -> vec3<f16> {
+  switch(p1) {
+    case 0u: {
+      return a[p0].m_0;
+    }
+    case 1u: {
+      return a[p0].m_1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_m_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructArrayStructMatUniform_Loads_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let l_a : array<Outer, 4> = a;
+  let l_a_1 : Outer = a[1];
+  let l_a_I : Outer = a[I];
+  let l_a_2_a : array<Inner, 4> = a[2].a;
+  let l_a_I_a : array<Inner, 4> = a[I].a;
+  let l_a_3_a_1 : Inner = a[3].a[1];
+  let l_a_3_a_I : Inner = a[3].a[I];
+  let l_a_I_a_1 : Inner = a[I].a[1];
+  let l_a_I_a_J : Inner = a[I].a[J];
+  let l_a_0_a_2_m : mat2x3<f16> = a[0].a[2].m;
+  let l_a_0_a_I_m : mat2x3<f16> = a[0].a[I].m;
+  let l_a_I_a_2_m : mat2x3<f16> = a[I].a[2].m;
+  let l_a_I_a_J_m : mat2x3<f16> = a[I].a[J].m;
+  let l_a_1_a_3_m_0 : vec3<f16> = a[1].a[3].m[0];
+  let l_a_I_a_J_m_K : vec3<f16> = a[I].a[J].m[K];
+  let l_a_2_a_0_m_1_0 : f16 = a[2].a[0].m[1][0];
+  let l_a_I_a_J_m_K_I : f16 = a[I].a[J].m[K][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Inner_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+struct Outer_std140 {
+  a : array<Inner_std140, 4u>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
+
+fn conv_Inner(val : Inner_std140) -> Inner {
+  return Inner(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
+  var arr : array<Inner, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Inner(val[i]);
+  }
+  return arr;
+}
+
+fn conv_Outer(val : Outer_std140) -> Outer {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
+  var arr : array<Outer, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Outer(val[i]);
+  }
+  return arr;
+}
+
+fn load_a_0_a_2_m() -> mat2x3<f16> {
+  let s = &(a[0u].a[2u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_0_a_p0_m(p0 : u32) -> mat2x3<f16> {
+  let s = &(a[0u].a[p0]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_2_m(p0 : u32) -> mat2x3<f16> {
+  let s = &(a[p0].a[2u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat2x3<f16> {
+  let s = &(a[p0].a[p1]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec3<f16> {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0;
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f16 {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0[p3];
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1[p3];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
+  let l_a_1 : Outer = conv_Outer(a[1u]);
+  let l_a_I : Outer = conv_Outer(a[I]);
+  let l_a_2_a : array<Inner, 4> = conv_arr4_Inner(a[2u].a);
+  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
+  let l_a_3_a_1 : Inner = conv_Inner(a[3u].a[1u]);
+  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
+  let l_a_I_a_1 : Inner = conv_Inner(a[I].a[1u]);
+  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
+  let l_a_0_a_2_m : mat2x3<f16> = load_a_0_a_2_m();
+  let l_a_0_a_I_m : mat2x3<f16> = load_a_0_a_p0_m(u32(I));
+  let l_a_I_a_2_m : mat2x3<f16> = load_a_p0_a_2_m(u32(I));
+  let l_a_I_a_J_m : mat2x3<f16> = load_a_p0_a_p1_m(u32(I), u32(J));
+  let l_a_1_a_3_m_0 : vec3<f16> = a[1u].a[3u].m_0;
+  let l_a_I_a_J_m_K : vec3<f16> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a_2_a_0_m_1_0 : f16 = a[2u].a[0u].m_1[0u];
+  let l_a_I_a_J_m_K_I : f16 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructArrayStructMatUniform_LoadsViaPtrs_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_I = &((*(p_a))[I]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_I_a = &((*(p_a_I)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_I = &((*(p_a_3_a))[I]);
+  let p_a_I_a_2 = &((*(p_a_I_a))[2]);
+  let p_a_I_a_J = &((*(p_a_I_a))[J]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_I_m = &((*(p_a_3_a_I)).m);
+  let p_a_I_a_2_m = &((*(p_a_I_a_2)).m);
+  let p_a_I_a_J_m = &((*(p_a_I_a_J)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let p_a_I_a_J_m_K = &((*(p_a_I_a_J_m))[K]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_I : Outer = *(p_a_I);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_I_a : array<Inner, 4> = *(p_a_I_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_I : Inner = *(p_a_3_a_I);
+  let l_a_I_a_2 : Inner = *(p_a_I_a_2);
+  let l_a_I_a_J : Inner = *(p_a_I_a_J);
+  let l_a_3_a_2_m : mat2x3<f16> = *(p_a_3_a_2_m);
+  let l_a_3_a_I_m : mat2x3<f16> = *(p_a_3_a_I_m);
+  let l_a_I_a_2_m : mat2x3<f16> = *(p_a_I_a_2_m);
+  let l_a_I_a_J_m : mat2x3<f16> = *(p_a_I_a_J_m);
+  let l_a_3_a_2_m_1 : vec3<f16> = *(p_a_3_a_2_m_1);
+  let l_a_I_a_J_m_K : vec3<f16> = *(p_a_I_a_J_m_K);
+  let l_a_2_a_0_m_1_0 : f16 = (*(p_a_3_a_2_m_1))[0];
+  let l_a_I_a_J_m_K_I : f16 = (*(p_a_I_a_J_m_K))[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Inner_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+struct Outer_std140 {
+  a : array<Inner_std140, 4u>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
+
+fn conv_Inner(val : Inner_std140) -> Inner {
+  return Inner(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
+  var arr : array<Inner, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Inner(val[i]);
+  }
+  return arr;
+}
+
+fn conv_Outer(val : Outer_std140) -> Outer {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
+  var arr : array<Outer, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Outer(val[i]);
+  }
+  return arr;
+}
+
+fn load_a_3_a_2_m() -> mat2x3<f16> {
+  let s = &(a[3u].a[2u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_3_a_p0_m(p0 : u32) -> mat2x3<f16> {
+  let s = &(a[3u].a[p0]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_2_m(p0 : u32) -> mat2x3<f16> {
+  let s = &(a[p0].a[2u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat2x3<f16> {
+  let s = &(a[p0].a[p1]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec3<f16> {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0;
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f16 {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0[p3];
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1[p3];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let p_a = conv_arr4_Outer(a);
+  let p_a_3 = conv_Outer(a[3u]);
+  let p_a_I = conv_Outer(a[I]);
+  let p_a_3_a = conv_arr4_Inner(a[3u].a);
+  let p_a_I_a = conv_arr4_Inner(a[I].a);
+  let p_a_3_a_2 = conv_Inner(a[3u].a[2u]);
+  let p_a_3_a_I = conv_Inner(a[3u].a[I]);
+  let p_a_I_a_2 = conv_Inner(a[I].a[2u]);
+  let p_a_I_a_J = conv_Inner(a[I].a[J]);
+  let p_a_3_a_2_m = load_a_3_a_2_m();
+  let p_a_3_a_I_m = load_a_3_a_p0_m(u32(I));
+  let p_a_I_a_2_m = load_a_p0_a_2_m(u32(I));
+  let p_a_I_a_J_m = load_a_p0_a_p1_m(u32(I), u32(J));
+  let p_a_3_a_2_m_1 = a[3u].a[2u].m_1;
+  let p_a_I_a_J_m_K = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
+  let l_a_3 : Outer = conv_Outer(a[3u]);
+  let l_a_I : Outer = conv_Outer(a[I]);
+  let l_a_3_a : array<Inner, 4> = conv_arr4_Inner(a[3u].a);
+  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
+  let l_a_3_a_2 : Inner = conv_Inner(a[3u].a[2u]);
+  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
+  let l_a_I_a_2 : Inner = conv_Inner(a[I].a[2u]);
+  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
+  let l_a_3_a_2_m : mat2x3<f16> = load_a_3_a_2_m();
+  let l_a_3_a_I_m : mat2x3<f16> = load_a_3_a_p0_m(u32(I));
+  let l_a_I_a_2_m : mat2x3<f16> = load_a_p0_a_2_m(u32(I));
+  let l_a_I_a_J_m : mat2x3<f16> = load_a_p0_a_p1_m(u32(I), u32(J));
+  let l_a_3_a_2_m_1 : vec3<f16> = a[3u].a[2u].m_1;
+  let l_a_I_a_J_m_K : vec3<f16> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a_2_a_0_m_1_0 : f16 = a[3u].a[2u].m_1[0u];
+  let l_a_I_a_J_m_K_I : f16 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyArray_UniformToStorage_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s = u;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn conv_arr4_S(val : array<S_std140, 4u>) -> array<S, 4u> {
+  var arr : array<S, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_S(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  s = conv_arr4_S(u);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyStruct_UniformToWorkgroup_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[0] = u[1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+var<workgroup> w : array<S, 4>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(val.v, mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn f() {
+  w[0] = conv_S(u[1u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyMatrix_UniformToPrivate_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[2].m = u[1].m;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+var<private> p : array<S, 4>;
+
+fn load_u_1_m() -> mat2x3<f16> {
+  let s = &(u[1u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn f() {
+  p[2].m = load_u_1_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyColumn_UniformToStorage_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s[3].m[1] = u[2].m[0];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s[3].m[1] = u[2u].m_0;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyColumnSwizzle_UniformToWorkgroup_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[3].m[1] = u[2].m[0].yzx.yzx;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[3].m[1] = u[2u].m_0.yzx.yzx;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyScalar_UniformToPrivate_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[3].m[1].x = u[2].m[0].y;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[3].m[1].x = u[2u].m_0[1u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadArray_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x3_f16(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let l = a[2];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x3_f16(a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x3_f16(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let l = a[2][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn f() {
+  let l = a[2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[2][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn load_a_2_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return a[2u].col0;
+    }
+    case 1u: {
+      return a[2u].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn load_a_p0_p1(p0 : u32, p1 : u32) -> vec3<f16> {
+  switch(p1) {
+    case 0u: {
+      return a[p0].col0;
+    }
+    case 1u: {
+      return a[p0].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructArrayMatUniform_LoadStruct_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn conv_S(val : S_std140) -> S {
+  return S(conv_arr3_mat2x3_f16(val.a));
+}
+
+fn f() {
+  let l = conv_S(s);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructArrayMatUniform_LoadArray_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x3_f16(s.a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a[2];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x3_f16(s.a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x3_f16(s.a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       StructArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a[2][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.a[2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       StructArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       StructArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[2][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_a_2_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return s.a[2u].col0;
+    }
+    case 1u: {
+      return s.a[2u].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_s_a_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       StructArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_a_p0_p1(p0 : u32, p1 : u32) -> vec3<f16> {
+  switch(p1) {
+    case 0u: {
+      return s.a[p0].col0;
+    }
+    case 1u: {
+      return s.a[p0].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_s_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayArrayMatUniform_LoadArrays_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn conv_arr4_arr3_mat2x3_f16(val : array<array<mat2x3_f16, 3u>, 4u>) -> array<array<mat2x3<f16>, 3u>, 4u> {
+  var arr : array<array<mat2x3<f16>, 3u>, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_arr3_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr4_arr3_mat2x3_f16(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayArrayMatUniform_LoadArray_ConstOuterArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let l = a[3];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x3_f16(a[3u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayArrayMatUniform_LoadArray_VariableOuterArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_arr3_mat2x3_f16(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_ConstInnerArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let l = a[3][2];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x3_f16(a[3u][2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_VariableInnerArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x3_f16(a[3u][I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_ConstInnerArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x3_f16(a[I][2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_VariableInnerArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x3_f16(a[I][I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let l = a[3][2][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn f() {
+  let l = a[3u][2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][2][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn load_a_3_2_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return a[3u][2u].col0;
+    }
+    case 1u: {
+      return a[3u][2u].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_3_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][I][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let l = a[3u][I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[3][I][J];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn load_a_3_p0_p1(p0 : u32, p1 : u32) -> vec3<f16> {
+  switch(p1) {
+    case 0u: {
+      return a[3u][p0].col0;
+    }
+    case 1u: {
+      return a[3u][p0].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = load_a_3_p0_p1(u32(I), u32(J));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][2][J];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn load_a_p0_2_p1(p0 : u32, p1 : u32) -> vec3<f16> {
+  switch(p1) {
+    case 0u: {
+      return a[p0][2u].col0;
+    }
+    case 1u: {
+      return a[p0][2u].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = load_a_p0_2_p1(u32(I), u32(J));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][J][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][J].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 0;
+  let J = 1;
+  let K = 2;
+  let l = a[I][J][K];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn load_a_p0_p1_p2(p0 : u32, p1 : u32, p2 : u32) -> vec3<f16> {
+  switch(p2) {
+    case 0u: {
+      return a[p0][p1].col0;
+    }
+    case 1u: {
+      return a[p0][p1].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let J = 1;
+  let K = 2;
+  let l = load_a_p0_p1_p2(u32(I), u32(J), u32(K));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+}  // namespace
+}  // namespace tint::transform
diff --git a/src/tint/transform/std140_f32_test.cc b/src/tint/transform/std140_f32_test.cc
new file mode 100644
index 0000000..b0bd467
--- /dev/null
+++ b/src/tint/transform/std140_f32_test.cc
@@ -0,0 +1,3359 @@
+// Copyright 2022 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/tint/transform/std140.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "src/tint/transform/test_helper.h"
+#include "src/tint/utils/string.h"
+
+namespace tint::transform {
+namespace {
+
+using Std140Test_F32 = TransformTest;
+
+TEST_F(Std140Test_F32, StructMatricesUniform) {
+    auto* src = R"(
+struct S2x2F32 {
+  m : mat2x2<f32>,
+}
+struct S3x2F32 {
+  m : mat3x2<f32>,
+}
+struct S4x2F32 {
+  m : mat4x2<f32>,
+}
+struct S2x3F32 {
+  m : mat2x3<f32>,
+}
+struct S3x3F32 {
+  m : mat3x3<f32>,
+}
+struct S4x3F32 {
+  m : mat4x3<f32>,
+}
+struct S2x4F32 {
+  m : mat2x4<f32>,
+}
+struct S3x4F32 {
+  m : mat3x4<f32>,
+}
+struct S4x4F32 {
+  m : mat4x4<f32>,
+}
+
+@group(2) @binding(2) var<uniform> s2x2f32 : S2x2F32;
+@group(3) @binding(2) var<uniform> s3x2f32 : S3x2F32;
+@group(4) @binding(2) var<uniform> s4x2f32 : S4x2F32;
+@group(2) @binding(3) var<uniform> s2x3f32 : S2x3F32;
+@group(3) @binding(3) var<uniform> s3x3f32 : S3x3F32;
+@group(4) @binding(3) var<uniform> s4x3f32 : S4x3F32;
+@group(2) @binding(4) var<uniform> s2x4f32 : S2x4F32;
+@group(3) @binding(4) var<uniform> s3x4f32 : S3x4F32;
+@group(4) @binding(4) var<uniform> s4x4f32 : S4x4F32;
+)";
+
+    auto* expect = R"(
+struct S2x2F32 {
+  m : mat2x2<f32>,
+}
+
+struct S2x2F32_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+struct S3x2F32 {
+  m : mat3x2<f32>,
+}
+
+struct S3x2F32_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+struct S4x2F32 {
+  m : mat4x2<f32>,
+}
+
+struct S4x2F32_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+  m_3 : vec2<f32>,
+}
+
+struct S2x3F32 {
+  m : mat2x3<f32>,
+}
+
+struct S3x3F32 {
+  m : mat3x3<f32>,
+}
+
+struct S4x3F32 {
+  m : mat4x3<f32>,
+}
+
+struct S2x4F32 {
+  m : mat2x4<f32>,
+}
+
+struct S3x4F32 {
+  m : mat3x4<f32>,
+}
+
+struct S4x4F32 {
+  m : mat4x4<f32>,
+}
+
+@group(2) @binding(2) var<uniform> s2x2f32 : S2x2F32_std140;
+
+@group(3) @binding(2) var<uniform> s3x2f32 : S3x2F32_std140;
+
+@group(4) @binding(2) var<uniform> s4x2f32 : S4x2F32_std140;
+
+@group(2) @binding(3) var<uniform> s2x3f32 : S2x3F32;
+
+@group(3) @binding(3) var<uniform> s3x3f32 : S3x3F32;
+
+@group(4) @binding(3) var<uniform> s4x3f32 : S4x3F32;
+
+@group(2) @binding(4) var<uniform> s2x4f32 : S2x4F32;
+
+@group(3) @binding(4) var<uniform> s3x4f32 : S3x4F32;
+
+@group(4) @binding(4) var<uniform> s4x4f32 : S4x4F32;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+// In the following tests we only test `mat2x2<f32>` for matrix used as array element type and
+// `mat3x2<f32>` otherwise, and set all constant column index to 1, row index 0, inner array index
+// 2, and outer array index 3. For exhaustive tests, i.e. tests on all matrix shape and different
+// valid constant index, please refer to std140_exhaustive_test.cc
+
+TEST_F(Std140Test_F32, SingleStructMatUniform_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, CustomAlign_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  before : i32,
+  @align(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  before : i32,
+  @align(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  @align(128i)
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, CustomSizeMat_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  before : i32,
+  @size(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  before : i32,
+  @size(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(112)
+  m_2 : vec2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, CustomAlignAndSize_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  before : i32,
+  @align(128) @size(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  before : i32,
+  @align(128) @size(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  @align(128i)
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(112)
+  m_2 : vec2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatrixUsageInForLoop_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  for(var i = u32(s.m[0][0]); (i < u32(s.m[i][1])); i += u32(s.m[1][i])) {
+  }
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_1(p0 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[1u];
+    }
+    case 1u: {
+      return s.m_1[1u];
+    }
+    case 2u: {
+      return s.m_2[1u];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  for(var i = u32(s.m_0[0u]); (i < u32(load_s_m_p0_1(u32(i)))); i += u32(s.m_1[i])) {
+  }
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadMatrix_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> m : mat3x2<f32>;
+
+fn f() {
+  let l = m;
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> m : mat3x2_f32;
+
+fn conv_mat3x2_f32(val : mat3x2_f32) -> mat3x2<f32> {
+  return mat3x2<f32>(val.col0, val.col1, val.col2);
+}
+
+fn f() {
+  let l = conv_mat3x2_f32(m);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadColumn_ConstIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let l = a[1];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn f() {
+  let l = a.col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadColumn_VariableIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn load_a_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return a.col0;
+    }
+    case 1u: {
+      return a.col1;
+    }
+    case 2u: {
+      return a.col2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadColumnSwizzle_ConstIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let l = a[1].yx;
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn f() {
+  let l = a.col1.yx;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadColumnSwizzle_VariableIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].yx;
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn load_a_p0_yx(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return a.col0.yx;
+    }
+    case 1u: {
+      return a.col1.yx;
+    }
+    case 2u: {
+      return a.col2.yx;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_yx(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let l = a[1][0];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn f() {
+  let l = a.col1[0u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let I = 0;
+  let l = a[I][0];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn load_a_p0_0(p0 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return a.col0[0u];
+    }
+    case 1u: {
+      return a.col1[0u];
+    }
+    case 2u: {
+      return a.col2[0u];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_a_p0_0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let I = 0;
+  let l = a[1][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn f() {
+  let I = 0;
+  let l = a.col1[I];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let I = 0;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn load_a_p0_p1(p0 : u32, p1 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return a.col0[p1];
+    }
+    case 1u: {
+      return a.col1[p1];
+    }
+    case 2u: {
+      return a.col2[p1];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_NameCollision_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m_1 : i32,
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  m_1 : i32,
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_1 : i32,
+  m__0 : vec2<f32>,
+  m__1 : vec2<f32>,
+  m__2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadStruct_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn f() {
+  let l = conv_S(s);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadMatrix_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m() -> mat3x2<f32> {
+  let s = &(s);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  let l = load_s_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadColumn_ConstIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadColumn_VariableIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return s.m_0;
+    }
+    case 1u: {
+      return s.m_1;
+    }
+    case 2u: {
+      return s.m_2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[1][0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_1[0u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I][0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_0(p0 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[0u];
+    }
+    case 1u: {
+      return s.m_1[0u];
+    }
+    case 2u: {
+      return s.m_2[0u];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0_0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[1][I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let I = 0;
+  let l = s.m_1[I];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I][I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_p1(p0 : u32, p1 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[p1];
+    }
+    case 1u: {
+      return s.m_1[p1];
+    }
+    case 2u: {
+      return s.m_2[p1];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadArray_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn conv_arr3_S(val : array<S_std140, 3u>) -> array<S, 3u> {
+  var arr : array<S, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_S(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_S(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadStruct_ConstIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn f() {
+  let l = conv_S(a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadStruct_VariableIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_S(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadMatrix_ConstArrayIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2].m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_2_m() -> mat3x2<f32> {
+  let s = &(a[2u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  let l = load_a_2_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadMatrix_VariableArrayIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_p0_m(p0 : u32) -> mat3x2<f32> {
+  let s = &(a[p0]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_m(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2].m[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let l = a[2u].m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[2].m[I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_2_m_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return a[2u].m_0;
+    }
+    case 1u: {
+      return a[2u].m_1;
+    }
+    case 2u: {
+      return a[2u].m_2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_2_m_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m[I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_p0_m_p1(p0 : u32, p1 : u32) -> vec2<f32> {
+  switch(p1) {
+    case 0u: {
+      return a[p0].m_0;
+    }
+    case 1u: {
+      return a[p0].m_1;
+    }
+    case 2u: {
+      return a[p0].m_2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_m_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructArrayStructMatUniform_Loads_Mat3x2F32) {
+    auto* src = R"(
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let l_a : array<Outer, 4> = a;
+  let l_a_1 : Outer = a[1];
+  let l_a_I : Outer = a[I];
+  let l_a_2_a : array<Inner, 4> = a[2].a;
+  let l_a_I_a : array<Inner, 4> = a[I].a;
+  let l_a_3_a_1 : Inner = a[3].a[1];
+  let l_a_3_a_I : Inner = a[3].a[I];
+  let l_a_I_a_1 : Inner = a[I].a[1];
+  let l_a_I_a_J : Inner = a[I].a[J];
+  let l_a_0_a_2_m : mat3x2<f32> = a[0].a[2].m;
+  let l_a_0_a_I_m : mat3x2<f32> = a[0].a[I].m;
+  let l_a_I_a_2_m : mat3x2<f32> = a[I].a[2].m;
+  let l_a_I_a_J_m : mat3x2<f32> = a[I].a[J].m;
+  let l_a_1_a_3_m_0 : vec2<f32> = a[1].a[3].m[0];
+  let l_a_I_a_J_m_K : vec2<f32> = a[I].a[J].m[K];
+  let l_a_2_a_0_m_1_0 : f32 = a[2].a[0].m[1][0];
+  let l_a_I_a_J_m_K_I : f32 = a[I].a[J].m[K][I];
+}
+)";
+
+    auto* expect = R"(
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Inner_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+struct Outer_std140 {
+  a : array<Inner_std140, 4u>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
+
+fn conv_Inner(val : Inner_std140) -> Inner {
+  return Inner(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
+  var arr : array<Inner, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Inner(val[i]);
+  }
+  return arr;
+}
+
+fn conv_Outer(val : Outer_std140) -> Outer {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
+  var arr : array<Outer, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Outer(val[i]);
+  }
+  return arr;
+}
+
+fn load_a_0_a_2_m() -> mat3x2<f32> {
+  let s = &(a[0u].a[2u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_0_a_p0_m(p0 : u32) -> mat3x2<f32> {
+  let s = &(a[0u].a[p0]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_2_m(p0 : u32) -> mat3x2<f32> {
+  let s = &(a[p0].a[2u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat3x2<f32> {
+  let s = &(a[p0].a[p1]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0;
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1;
+    }
+    case 2u: {
+      return a[p0].a[p1].m_2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f32 {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0[p3];
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1[p3];
+    }
+    case 2u: {
+      return a[p0].a[p1].m_2[p3];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
+  let l_a_1 : Outer = conv_Outer(a[1u]);
+  let l_a_I : Outer = conv_Outer(a[I]);
+  let l_a_2_a : array<Inner, 4> = conv_arr4_Inner(a[2u].a);
+  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
+  let l_a_3_a_1 : Inner = conv_Inner(a[3u].a[1u]);
+  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
+  let l_a_I_a_1 : Inner = conv_Inner(a[I].a[1u]);
+  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
+  let l_a_0_a_2_m : mat3x2<f32> = load_a_0_a_2_m();
+  let l_a_0_a_I_m : mat3x2<f32> = load_a_0_a_p0_m(u32(I));
+  let l_a_I_a_2_m : mat3x2<f32> = load_a_p0_a_2_m(u32(I));
+  let l_a_I_a_J_m : mat3x2<f32> = load_a_p0_a_p1_m(u32(I), u32(J));
+  let l_a_1_a_3_m_0 : vec2<f32> = a[1u].a[3u].m_0;
+  let l_a_I_a_J_m_K : vec2<f32> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a_2_a_0_m_1_0 : f32 = a[2u].a[0u].m_1[0u];
+  let l_a_I_a_J_m_K_I : f32 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructArrayStructMatUniform_LoadsViaPtrs_Mat3x2F32) {
+    auto* src = R"(
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_I = &((*(p_a))[I]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_I_a = &((*(p_a_I)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_I = &((*(p_a_3_a))[I]);
+  let p_a_I_a_2 = &((*(p_a_I_a))[2]);
+  let p_a_I_a_J = &((*(p_a_I_a))[J]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_I_m = &((*(p_a_3_a_I)).m);
+  let p_a_I_a_2_m = &((*(p_a_I_a_2)).m);
+  let p_a_I_a_J_m = &((*(p_a_I_a_J)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let p_a_I_a_J_m_K = &((*(p_a_I_a_J_m))[K]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_I : Outer = *(p_a_I);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_I_a : array<Inner, 4> = *(p_a_I_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_I : Inner = *(p_a_3_a_I);
+  let l_a_I_a_2 : Inner = *(p_a_I_a_2);
+  let l_a_I_a_J : Inner = *(p_a_I_a_J);
+  let l_a_3_a_2_m : mat3x2<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_I_m : mat3x2<f32> = *(p_a_3_a_I_m);
+  let l_a_I_a_2_m : mat3x2<f32> = *(p_a_I_a_2_m);
+  let l_a_I_a_J_m : mat3x2<f32> = *(p_a_I_a_J_m);
+  let l_a_3_a_2_m_1 : vec2<f32> = *(p_a_3_a_2_m_1);
+  let l_a_I_a_J_m_K : vec2<f32> = *(p_a_I_a_J_m_K);
+  let l_a_2_a_0_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+  let l_a_I_a_J_m_K_I : f32 = (*(p_a_I_a_J_m_K))[I];
+}
+)";
+
+    auto* expect = R"(
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Inner_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+struct Outer_std140 {
+  a : array<Inner_std140, 4u>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
+
+fn conv_Inner(val : Inner_std140) -> Inner {
+  return Inner(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
+  var arr : array<Inner, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Inner(val[i]);
+  }
+  return arr;
+}
+
+fn conv_Outer(val : Outer_std140) -> Outer {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
+  var arr : array<Outer, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Outer(val[i]);
+  }
+  return arr;
+}
+
+fn load_a_3_a_2_m() -> mat3x2<f32> {
+  let s = &(a[3u].a[2u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_3_a_p0_m(p0 : u32) -> mat3x2<f32> {
+  let s = &(a[3u].a[p0]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_2_m(p0 : u32) -> mat3x2<f32> {
+  let s = &(a[p0].a[2u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat3x2<f32> {
+  let s = &(a[p0].a[p1]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0;
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1;
+    }
+    case 2u: {
+      return a[p0].a[p1].m_2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f32 {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0[p3];
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1[p3];
+    }
+    case 2u: {
+      return a[p0].a[p1].m_2[p3];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let p_a = conv_arr4_Outer(a);
+  let p_a_3 = conv_Outer(a[3u]);
+  let p_a_I = conv_Outer(a[I]);
+  let p_a_3_a = conv_arr4_Inner(a[3u].a);
+  let p_a_I_a = conv_arr4_Inner(a[I].a);
+  let p_a_3_a_2 = conv_Inner(a[3u].a[2u]);
+  let p_a_3_a_I = conv_Inner(a[3u].a[I]);
+  let p_a_I_a_2 = conv_Inner(a[I].a[2u]);
+  let p_a_I_a_J = conv_Inner(a[I].a[J]);
+  let p_a_3_a_2_m = load_a_3_a_2_m();
+  let p_a_3_a_I_m = load_a_3_a_p0_m(u32(I));
+  let p_a_I_a_2_m = load_a_p0_a_2_m(u32(I));
+  let p_a_I_a_J_m = load_a_p0_a_p1_m(u32(I), u32(J));
+  let p_a_3_a_2_m_1 = a[3u].a[2u].m_1;
+  let p_a_I_a_J_m_K = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
+  let l_a_3 : Outer = conv_Outer(a[3u]);
+  let l_a_I : Outer = conv_Outer(a[I]);
+  let l_a_3_a : array<Inner, 4> = conv_arr4_Inner(a[3u].a);
+  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
+  let l_a_3_a_2 : Inner = conv_Inner(a[3u].a[2u]);
+  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
+  let l_a_I_a_2 : Inner = conv_Inner(a[I].a[2u]);
+  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
+  let l_a_3_a_2_m : mat3x2<f32> = load_a_3_a_2_m();
+  let l_a_3_a_I_m : mat3x2<f32> = load_a_3_a_p0_m(u32(I));
+  let l_a_I_a_2_m : mat3x2<f32> = load_a_p0_a_2_m(u32(I));
+  let l_a_I_a_J_m : mat3x2<f32> = load_a_p0_a_p1_m(u32(I), u32(J));
+  let l_a_3_a_2_m_1 : vec2<f32> = a[3u].a[2u].m_1;
+  let l_a_I_a_J_m_K : vec2<f32> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a_2_a_0_m_1_0 : f32 = a[3u].a[2u].m_1[0u];
+  let l_a_I_a_J_m_K_I : f32 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyArray_UniformToStorage_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s = u;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn conv_arr4_S(val : array<S_std140, 4u>) -> array<S, 4u> {
+  var arr : array<S, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_S(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  s = conv_arr4_S(u);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyStruct_UniformToWorkgroup_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[0] = u[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+var<workgroup> w : array<S, 4>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(val.v, mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn f() {
+  w[0] = conv_S(u[1u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyMatrix_UniformToPrivate_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[2].m = u[1].m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+var<private> p : array<S, 4>;
+
+fn load_u_1_m() -> mat3x2<f32> {
+  let s = &(u[1u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  p[2].m = load_u_1_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyColumn_UniformToStorage_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s[3].m[1] = u[2].m[0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s[3].m[1] = u[2u].m_0;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyColumnSwizzle_UniformToWorkgroup_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[3].m[1] = u[2].m[0].yx.yx;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[3].m[1] = u[2u].m_0.yx.yx;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyScalar_UniformToPrivate_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[3].m[1].x = u[2].m[0].y;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[3].m[1].x = u[2u].m_0[1u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadArray_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x2_f32(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let l = a[2];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x2_f32(a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x2_f32(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let l = a[2][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn f() {
+  let l = a[2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[2][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn load_a_2_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return a[2u].col0;
+    }
+    case 1u: {
+      return a[2u].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn load_a_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
+  switch(p1) {
+    case 0u: {
+      return a[p0].col0;
+    }
+    case 1u: {
+      return a[p0].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructArrayMatUniform_LoadStruct_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s;
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn conv_S(val : S_std140) -> S {
+  return S(conv_arr3_mat2x2_f32(val.a));
+}
+
+fn f() {
+  let l = conv_S(s);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructArrayMatUniform_LoadArray_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a;
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x2_f32(s.a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a[2];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x2_f32(s.a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x2_f32(s.a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       StructArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a[2][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.a[2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       StructArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       StructArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[2][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_a_2_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return s.a[2u].col0;
+    }
+    case 1u: {
+      return s.a[2u].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_s_a_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       StructArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_a_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
+  switch(p1) {
+    case 0u: {
+      return s.a[p0].col0;
+    }
+    case 1u: {
+      return s.a[p0].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_s_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArrays_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn conv_arr4_arr3_mat2x2_f32(val : array<array<mat2x2_f32, 3u>, 4u>) -> array<array<mat2x2<f32>, 3u>, 4u> {
+  var arr : array<array<mat2x2<f32>, 3u>, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_arr3_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr4_arr3_mat2x2_f32(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArray_ConstOuterArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let l = a[3];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x2_f32(a[3u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArray_VariableOuterArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_arr3_mat2x2_f32(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_ConstInnerArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let l = a[3][2];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x2_f32(a[3u][2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_VariableInnerArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x2_f32(a[3u][I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_ConstInnerArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x2_f32(a[I][2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_VariableInnerArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x2_f32(a[I][I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let l = a[3][2][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn f() {
+  let l = a[3u][2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][2][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn load_a_3_2_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return a[3u][2u].col0;
+    }
+    case 1u: {
+      return a[3u][2u].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_3_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][I][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let l = a[3u][I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[3][I][J];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn load_a_3_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
+  switch(p1) {
+    case 0u: {
+      return a[3u][p0].col0;
+    }
+    case 1u: {
+      return a[3u][p0].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = load_a_3_p0_p1(u32(I), u32(J));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][2][J];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn load_a_p0_2_p1(p0 : u32, p1 : u32) -> vec2<f32> {
+  switch(p1) {
+    case 0u: {
+      return a[p0][2u].col0;
+    }
+    case 1u: {
+      return a[p0][2u].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = load_a_p0_2_p1(u32(I), u32(J));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][J][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][J].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 0;
+  let J = 1;
+  let K = 2;
+  let l = a[I][J][K];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn load_a_p0_p1_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
+  switch(p2) {
+    case 0u: {
+      return a[p0][p1].col0;
+    }
+    case 1u: {
+      return a[p0][p1].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let J = 1;
+  let K = 2;
+  let l = load_a_p0_p1_p2(u32(I), u32(J), u32(K));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+}  // namespace
+}  // namespace tint::transform
diff --git a/src/tint/transform/std140_test.cc b/src/tint/transform/std140_test.cc
index 1ec2e09..73221bc 100644
--- a/src/tint/transform/std140_test.cc
+++ b/src/tint/transform/std140_test.cc
@@ -21,6 +21,12 @@
 #include "src/tint/transform/test_helper.h"
 #include "src/tint/utils/string.h"
 
+// This file contains the should-run tests and a trival empty module test for Std140 transform.
+// For testing transform results with clear readability, please refer to std140_f32_test.cc for f32
+// matricies and std140_f16_test.cc for f16 matricies. For exhaustive tests that run Std140
+// transform on all shape of both f32 and f16 matricies and loop on all valid literal index when
+// required, please refer to std140_exhaustive_test.cc.
+
 namespace tint::transform {
 namespace {
 
@@ -96,6 +102,8 @@
 
 TEST_P(Std140TestShouldRun, StructStorage) {
     std::string src = R"(
+enable f16;
+
 struct S {
   m : ${mat},
 }
@@ -110,6 +118,8 @@
 
 TEST_P(Std140TestShouldRun, StructUniform) {
     std::string src = R"(
+enable f16;
+
 struct S {
   m : ${mat},
 }
@@ -124,6 +134,8 @@
 
 TEST_P(Std140TestShouldRun, ArrayStorage) {
     std::string src = R"(
+enable f16;
+
 @group(0) @binding(0) var<storage> s : array<${mat}, 2>;
 )";
 
@@ -141,6 +153,8 @@
     }
 
     std::string src = R"(
+enable f16;
+
 @group(0) @binding(0) var<uniform> s : array<${mat}, 2>;
 )";
 
@@ -161,6 +175,15 @@
                              {4, 2, MatrixType::f32},
                              {4, 3, MatrixType::f32},
                              {4, 4, MatrixType::f32},
+                             {2, 2, MatrixType::f16},
+                             {2, 3, MatrixType::f16},
+                             {2, 4, MatrixType::f16},
+                             {3, 2, MatrixType::f16},
+                             {3, 3, MatrixType::f16},
+                             {3, 4, MatrixType::f16},
+                             {4, 2, MatrixType::f16},
+                             {4, 3, MatrixType::f16},
+                             {4, 4, MatrixType::f16},
                          }));
 
 TEST_F(Std140Test, EmptyModule) {
@@ -173,3336 +196,5 @@
     EXPECT_EQ(expect, str(got));
 }
 
-using Std140Test_F32 = Std140Test;
-
-TEST_F(Std140Test_F32, StructMatricesUniform) {
-    auto* src = R"(
-struct S2x2F32 {
-  m : mat2x2<f32>,
-}
-struct S3x2F32 {
-  m : mat3x2<f32>,
-}
-struct S4x2F32 {
-  m : mat4x2<f32>,
-}
-struct S2x3F32 {
-  m : mat2x3<f32>,
-}
-struct S3x3F32 {
-  m : mat3x3<f32>,
-}
-struct S4x3F32 {
-  m : mat4x3<f32>,
-}
-struct S2x4F32 {
-  m : mat2x4<f32>,
-}
-struct S3x4F32 {
-  m : mat3x4<f32>,
-}
-struct S4x4F32 {
-  m : mat4x4<f32>,
-}
-
-@group(2) @binding(2) var<uniform> s2x2f32 : S2x2F32;
-@group(3) @binding(2) var<uniform> s3x2f32 : S3x2F32;
-@group(4) @binding(2) var<uniform> s4x2f32 : S4x2F32;
-@group(2) @binding(3) var<uniform> s2x3f32 : S2x3F32;
-@group(3) @binding(3) var<uniform> s3x3f32 : S3x3F32;
-@group(4) @binding(3) var<uniform> s4x3f32 : S4x3F32;
-@group(2) @binding(4) var<uniform> s2x4f32 : S2x4F32;
-@group(3) @binding(4) var<uniform> s3x4f32 : S3x4F32;
-@group(4) @binding(4) var<uniform> s4x4f32 : S4x4F32;
-)";
-
-    auto* expect = R"(
-struct S2x2F32 {
-  m : mat2x2<f32>,
-}
-
-struct S2x2F32_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-}
-
-struct S3x2F32 {
-  m : mat3x2<f32>,
-}
-
-struct S3x2F32_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-struct S4x2F32 {
-  m : mat4x2<f32>,
-}
-
-struct S4x2F32_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-  m_3 : vec2<f32>,
-}
-
-struct S2x3F32 {
-  m : mat2x3<f32>,
-}
-
-struct S3x3F32 {
-  m : mat3x3<f32>,
-}
-
-struct S4x3F32 {
-  m : mat4x3<f32>,
-}
-
-struct S2x4F32 {
-  m : mat2x4<f32>,
-}
-
-struct S3x4F32 {
-  m : mat3x4<f32>,
-}
-
-struct S4x4F32 {
-  m : mat4x4<f32>,
-}
-
-@group(2) @binding(2) var<uniform> s2x2f32 : S2x2F32_std140;
-
-@group(3) @binding(2) var<uniform> s3x2f32 : S3x2F32_std140;
-
-@group(4) @binding(2) var<uniform> s4x2f32 : S4x2F32_std140;
-
-@group(2) @binding(3) var<uniform> s2x3f32 : S2x3F32;
-
-@group(3) @binding(3) var<uniform> s3x3f32 : S3x3F32;
-
-@group(4) @binding(3) var<uniform> s4x3f32 : S4x3F32;
-
-@group(2) @binding(4) var<uniform> s2x4f32 : S2x4F32;
-
-@group(3) @binding(4) var<uniform> s3x4f32 : S3x4F32;
-
-@group(4) @binding(4) var<uniform> s4x4f32 : S4x4F32;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-// In the following tests we only test `mat2x2<f32>` for matrix used as array element type and
-// `mat3x2<f32>` otherwise, and set all constant column index to 1, row index 0, inner array index
-// 2, and outer array index 3. For exhaustive tests, i.e. tests on all matrix shape and different
-// valid constant index, please refer to std140_exhaustive_test.cc
-
-TEST_F(Std140Test_F32, SingleStructMatUniform_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, CustomAlign_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  before : i32,
-  @align(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-)";
-
-    auto* expect = R"(
-struct S {
-  before : i32,
-  @align(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-struct S_std140 {
-  before : i32,
-  @align(128i)
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, CustomSizeMat_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  before : i32,
-  @size(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-)";
-
-    auto* expect = R"(
-struct S {
-  before : i32,
-  @size(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-struct S_std140 {
-  before : i32,
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(112)
-  m_2 : vec2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, CustomAlignAndSize_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  before : i32,
-  @align(128) @size(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-)";
-
-    auto* expect = R"(
-struct S {
-  before : i32,
-  @align(128) @size(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-struct S_std140 {
-  before : i32,
-  @align(128i)
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(112)
-  m_2 : vec2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatrixUsageInForLoop_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  for(var i = u32(s.m[0][0]); (i < u32(s.m[i][1])); i += u32(s.m[1][i])) {
-  }
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_m_p0_1(p0 : u32) -> f32 {
-  switch(p0) {
-    case 0u: {
-      return s.m_0[1u];
-    }
-    case 1u: {
-      return s.m_1[1u];
-    }
-    case 2u: {
-      return s.m_2[1u];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  for(var i = u32(s.m_0[0u]); (i < u32(load_s_m_p0_1(u32(i)))); i += u32(s.m_1[i])) {
-  }
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadMatrix_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> m : mat3x2<f32>;
-
-fn f() {
-  let l = m;
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> m : mat3x2_f32;
-
-fn conv_mat3x2_f32(val : mat3x2_f32) -> mat3x2<f32> {
-  return mat3x2<f32>(val.col0, val.col1, val.col2);
-}
-
-fn f() {
-  let l = conv_mat3x2_f32(m);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadColumn_ConstIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let l = a[1];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn f() {
-  let l = a.col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadColumn_VariableIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let I = 1;
-  let l = a[I];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn load_a_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return a.col0;
-    }
-    case 1u: {
-      return a.col1;
-    }
-    case 2u: {
-      return a.col2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadColumnSwizzle_ConstIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let l = a[1].yx;
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn f() {
-  let l = a.col1.yx;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadColumnSwizzle_VariableIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].yx;
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn load_a_p0_yx(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return a.col0.yx;
-    }
-    case 1u: {
-      return a.col1.yx;
-    }
-    case 2u: {
-      return a.col2.yx;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_p0_yx(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let l = a[1][0];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn f() {
-  let l = a.col1[0u];
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let I = 0;
-  let l = a[I][0];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn load_a_p0_0(p0 : u32) -> f32 {
-  switch(p0) {
-    case 0u: {
-      return a.col0[0u];
-    }
-    case 1u: {
-      return a.col1[0u];
-    }
-    case 2u: {
-      return a.col2[0u];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let l = load_a_p0_0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let I = 0;
-  let l = a[1][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn f() {
-  let I = 0;
-  let l = a.col1[I];
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let I = 0;
-  let l = a[I][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn load_a_p0_p1(p0 : u32, p1 : u32) -> f32 {
-  switch(p0) {
-    case 0u: {
-      return a.col0[p1];
-    }
-    case 1u: {
-      return a.col1[p1];
-    }
-    case 2u: {
-      return a.col2[p1];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let l = load_a_p0_p1(u32(I), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_NameCollision_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m_1 : i32,
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-)";
-
-    auto* expect = R"(
-struct S {
-  m_1 : i32,
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_1 : i32,
-  m__0 : vec2<f32>,
-  m__1 : vec2<f32>,
-  m__2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadStruct_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn conv_S(val : S_std140) -> S {
-  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn f() {
-  let l = conv_S(s);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadMatrix_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.m;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_m() -> mat3x2<f32> {
-  let s = &(s);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn f() {
-  let l = load_s_m();
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadColumn_ConstIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.m[1];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn f() {
-  let l = s.m_1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadColumn_VariableIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 0;
-  let l = s.m[I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_m_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return s.m_0;
-    }
-    case 1u: {
-      return s.m_1;
-    }
-    case 2u: {
-      return s.m_2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let l = load_s_m_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.m[1][0];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn f() {
-  let l = s.m_1[0u];
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 0;
-  let l = s.m[I][0];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_m_p0_0(p0 : u32) -> f32 {
-  switch(p0) {
-    case 0u: {
-      return s.m_0[0u];
-    }
-    case 1u: {
-      return s.m_1[0u];
-    }
-    case 2u: {
-      return s.m_2[0u];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let l = load_s_m_p0_0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 0;
-  let l = s.m[1][I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn f() {
-  let I = 0;
-  let l = s.m_1[I];
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 0;
-  let l = s.m[I][I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_m_p0_p1(p0 : u32, p1 : u32) -> f32 {
-  switch(p0) {
-    case 0u: {
-      return s.m_0[p1];
-    }
-    case 1u: {
-      return s.m_1[p1];
-    }
-    case 2u: {
-      return s.m_2[p1];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let l = load_s_m_p0_p1(u32(I), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadArray_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let l = a;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn conv_S(val : S_std140) -> S {
-  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn conv_arr3_S(val : array<S_std140, 3u>) -> array<S, 3u> {
-  var arr : array<S, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_S(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let l = conv_arr3_S(a);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadStruct_ConstIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let l = a[2];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn conv_S(val : S_std140) -> S {
-  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn f() {
-  let l = conv_S(a[2u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadStruct_VariableIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn conv_S(val : S_std140) -> S {
-  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_S(a[I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadMatrix_ConstArrayIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let l = a[2].m;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn load_a_2_m() -> mat3x2<f32> {
-  let s = &(a[2u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn f() {
-  let l = load_a_2_m();
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadMatrix_VariableArrayIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].m;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn load_a_p0_m(p0 : u32) -> mat3x2<f32> {
-  let s = &(a[p0]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_p0_m(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let l = a[2].m[1];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn f() {
-  let l = a[2u].m_1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].m[1];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].m_1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[2].m[I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn load_a_2_m_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return a[2u].m_0;
-    }
-    case 1u: {
-      return a[2u].m_1;
-    }
-    case 2u: {
-      return a[2u].m_2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_2_m_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].m[I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn load_a_p0_m_p1(p0 : u32, p1 : u32) -> vec2<f32> {
-  switch(p1) {
-    case 0u: {
-      return a[p0].m_0;
-    }
-    case 1u: {
-      return a[p0].m_1;
-    }
-    case 2u: {
-      return a[p0].m_2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_p0_m_p1(u32(I), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructArrayStructMatUniform_Loads_Mat3x2F32) {
-    auto* src = R"(
-struct Inner {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let K = 0;
-  let l_a : array<Outer, 4> = a;
-  let l_a_1 : Outer = a[1];
-  let l_a_I : Outer = a[I];
-  let l_a_2_a : array<Inner, 4> = a[2].a;
-  let l_a_I_a : array<Inner, 4> = a[I].a;
-  let l_a_3_a_1 : Inner = a[3].a[1];
-  let l_a_3_a_I : Inner = a[3].a[I];
-  let l_a_I_a_1 : Inner = a[I].a[1];
-  let l_a_I_a_J : Inner = a[I].a[J];
-  let l_a_0_a_2_m : mat3x2<f32> = a[0].a[2].m;
-  let l_a_0_a_I_m : mat3x2<f32> = a[0].a[I].m;
-  let l_a_I_a_2_m : mat3x2<f32> = a[I].a[2].m;
-  let l_a_I_a_J_m : mat3x2<f32> = a[I].a[J].m;
-  let l_a_1_a_3_m_0 : vec2<f32> = a[1].a[3].m[0];
-  let l_a_I_a_J_m_K : vec2<f32> = a[I].a[J].m[K];
-  let l_a_2_a_0_m_1_0 : f32 = a[2].a[0].m[1][0];
-  let l_a_I_a_J_m_K_I : f32 = a[I].a[J].m[K][I];
-}
-)";
-
-    auto* expect = R"(
-struct Inner {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct Inner_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-struct Outer_std140 {
-  a : array<Inner_std140, 4u>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
-
-fn conv_Inner(val : Inner_std140) -> Inner {
-  return Inner(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
-  var arr : array<Inner, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_Inner(val[i]);
-  }
-  return arr;
-}
-
-fn conv_Outer(val : Outer_std140) -> Outer {
-  return Outer(conv_arr4_Inner(val.a));
-}
-
-fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
-  var arr : array<Outer, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_Outer(val[i]);
-  }
-  return arr;
-}
-
-fn load_a_0_a_2_m() -> mat3x2<f32> {
-  let s = &(a[0u].a[2u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_0_a_p0_m(p0 : u32) -> mat3x2<f32> {
-  let s = &(a[0u].a[p0]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_2_m(p0 : u32) -> mat3x2<f32> {
-  let s = &(a[p0].a[2u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat3x2<f32> {
-  let s = &(a[p0].a[p1]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
-  switch(p2) {
-    case 0u: {
-      return a[p0].a[p1].m_0;
-    }
-    case 1u: {
-      return a[p0].a[p1].m_1;
-    }
-    case 2u: {
-      return a[p0].a[p1].m_2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f32 {
-  switch(p2) {
-    case 0u: {
-      return a[p0].a[p1].m_0[p3];
-    }
-    case 1u: {
-      return a[p0].a[p1].m_1[p3];
-    }
-    case 2u: {
-      return a[p0].a[p1].m_2[p3];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let K = 0;
-  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
-  let l_a_1 : Outer = conv_Outer(a[1u]);
-  let l_a_I : Outer = conv_Outer(a[I]);
-  let l_a_2_a : array<Inner, 4> = conv_arr4_Inner(a[2u].a);
-  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
-  let l_a_3_a_1 : Inner = conv_Inner(a[3u].a[1u]);
-  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
-  let l_a_I_a_1 : Inner = conv_Inner(a[I].a[1u]);
-  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
-  let l_a_0_a_2_m : mat3x2<f32> = load_a_0_a_2_m();
-  let l_a_0_a_I_m : mat3x2<f32> = load_a_0_a_p0_m(u32(I));
-  let l_a_I_a_2_m : mat3x2<f32> = load_a_p0_a_2_m(u32(I));
-  let l_a_I_a_J_m : mat3x2<f32> = load_a_p0_a_p1_m(u32(I), u32(J));
-  let l_a_1_a_3_m_0 : vec2<f32> = a[1u].a[3u].m_0;
-  let l_a_I_a_J_m_K : vec2<f32> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
-  let l_a_2_a_0_m_1_0 : f32 = a[2u].a[0u].m_1[0u];
-  let l_a_I_a_J_m_K_I : f32 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructArrayStructMatUniform_LoadsViaPtrs_Mat3x2F32) {
-    auto* src = R"(
-struct Inner {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let K = 0;
-  let p_a = &(a);
-  let p_a_3 = &((*(p_a))[3]);
-  let p_a_I = &((*(p_a))[I]);
-  let p_a_3_a = &((*(p_a_3)).a);
-  let p_a_I_a = &((*(p_a_I)).a);
-  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
-  let p_a_3_a_I = &((*(p_a_3_a))[I]);
-  let p_a_I_a_2 = &((*(p_a_I_a))[2]);
-  let p_a_I_a_J = &((*(p_a_I_a))[J]);
-  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
-  let p_a_3_a_I_m = &((*(p_a_3_a_I)).m);
-  let p_a_I_a_2_m = &((*(p_a_I_a_2)).m);
-  let p_a_I_a_J_m = &((*(p_a_I_a_J)).m);
-  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
-  let p_a_I_a_J_m_K = &((*(p_a_I_a_J_m))[K]);
-  let l_a : array<Outer, 4> = *(p_a);
-  let l_a_3 : Outer = *(p_a_3);
-  let l_a_I : Outer = *(p_a_I);
-  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
-  let l_a_I_a : array<Inner, 4> = *(p_a_I_a);
-  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
-  let l_a_3_a_I : Inner = *(p_a_3_a_I);
-  let l_a_I_a_2 : Inner = *(p_a_I_a_2);
-  let l_a_I_a_J : Inner = *(p_a_I_a_J);
-  let l_a_3_a_2_m : mat3x2<f32> = *(p_a_3_a_2_m);
-  let l_a_3_a_I_m : mat3x2<f32> = *(p_a_3_a_I_m);
-  let l_a_I_a_2_m : mat3x2<f32> = *(p_a_I_a_2_m);
-  let l_a_I_a_J_m : mat3x2<f32> = *(p_a_I_a_J_m);
-  let l_a_3_a_2_m_1 : vec2<f32> = *(p_a_3_a_2_m_1);
-  let l_a_I_a_J_m_K : vec2<f32> = *(p_a_I_a_J_m_K);
-  let l_a_2_a_0_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
-  let l_a_I_a_J_m_K_I : f32 = (*(p_a_I_a_J_m_K))[I];
-}
-)";
-
-    auto* expect = R"(
-struct Inner {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct Inner_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-struct Outer_std140 {
-  a : array<Inner_std140, 4u>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
-
-fn conv_Inner(val : Inner_std140) -> Inner {
-  return Inner(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
-  var arr : array<Inner, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_Inner(val[i]);
-  }
-  return arr;
-}
-
-fn conv_Outer(val : Outer_std140) -> Outer {
-  return Outer(conv_arr4_Inner(val.a));
-}
-
-fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
-  var arr : array<Outer, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_Outer(val[i]);
-  }
-  return arr;
-}
-
-fn load_a_3_a_2_m() -> mat3x2<f32> {
-  let s = &(a[3u].a[2u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_3_a_p0_m(p0 : u32) -> mat3x2<f32> {
-  let s = &(a[3u].a[p0]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_2_m(p0 : u32) -> mat3x2<f32> {
-  let s = &(a[p0].a[2u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat3x2<f32> {
-  let s = &(a[p0].a[p1]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
-  switch(p2) {
-    case 0u: {
-      return a[p0].a[p1].m_0;
-    }
-    case 1u: {
-      return a[p0].a[p1].m_1;
-    }
-    case 2u: {
-      return a[p0].a[p1].m_2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f32 {
-  switch(p2) {
-    case 0u: {
-      return a[p0].a[p1].m_0[p3];
-    }
-    case 1u: {
-      return a[p0].a[p1].m_1[p3];
-    }
-    case 2u: {
-      return a[p0].a[p1].m_2[p3];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let K = 0;
-  let p_a = conv_arr4_Outer(a);
-  let p_a_3 = conv_Outer(a[3u]);
-  let p_a_I = conv_Outer(a[I]);
-  let p_a_3_a = conv_arr4_Inner(a[3u].a);
-  let p_a_I_a = conv_arr4_Inner(a[I].a);
-  let p_a_3_a_2 = conv_Inner(a[3u].a[2u]);
-  let p_a_3_a_I = conv_Inner(a[3u].a[I]);
-  let p_a_I_a_2 = conv_Inner(a[I].a[2u]);
-  let p_a_I_a_J = conv_Inner(a[I].a[J]);
-  let p_a_3_a_2_m = load_a_3_a_2_m();
-  let p_a_3_a_I_m = load_a_3_a_p0_m(u32(I));
-  let p_a_I_a_2_m = load_a_p0_a_2_m(u32(I));
-  let p_a_I_a_J_m = load_a_p0_a_p1_m(u32(I), u32(J));
-  let p_a_3_a_2_m_1 = a[3u].a[2u].m_1;
-  let p_a_I_a_J_m_K = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
-  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
-  let l_a_3 : Outer = conv_Outer(a[3u]);
-  let l_a_I : Outer = conv_Outer(a[I]);
-  let l_a_3_a : array<Inner, 4> = conv_arr4_Inner(a[3u].a);
-  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
-  let l_a_3_a_2 : Inner = conv_Inner(a[3u].a[2u]);
-  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
-  let l_a_I_a_2 : Inner = conv_Inner(a[I].a[2u]);
-  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
-  let l_a_3_a_2_m : mat3x2<f32> = load_a_3_a_2_m();
-  let l_a_3_a_I_m : mat3x2<f32> = load_a_3_a_p0_m(u32(I));
-  let l_a_I_a_2_m : mat3x2<f32> = load_a_p0_a_2_m(u32(I));
-  let l_a_I_a_J_m : mat3x2<f32> = load_a_p0_a_p1_m(u32(I), u32(J));
-  let l_a_3_a_2_m_1 : vec2<f32> = a[3u].a[2u].m_1;
-  let l_a_I_a_J_m_K : vec2<f32> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
-  let l_a_2_a_0_m_1_0 : f32 = a[3u].a[2u].m_1[0u];
-  let l_a_I_a_J_m_K_I : f32 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyArray_UniformToStorage_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-fn f() {
-  s = u;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-fn conv_S(val : S_std140) -> S {
-  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn conv_arr4_S(val : array<S_std140, 4u>) -> array<S, 4u> {
-  var arr : array<S, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_S(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  s = conv_arr4_S(u);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyStruct_UniformToWorkgroup_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-var<workgroup> w : array<S, 4>;
-
-fn f() {
-  w[0] = u[1];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  v : vec4<i32>,
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
-
-var<workgroup> w : array<S, 4>;
-
-fn conv_S(val : S_std140) -> S {
-  return S(val.v, mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn f() {
-  w[0] = conv_S(u[1u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyMatrix_UniformToPrivate_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 3>;
-
-var<private> p : array<S, 4>;
-
-fn f() {
-  p[2].m = u[1].m;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  v : vec4<i32>,
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
-
-var<private> p : array<S, 4>;
-
-fn load_u_1_m() -> mat3x2<f32> {
-  let s = &(u[1u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn f() {
-  p[2].m = load_u_1_m();
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyColumn_UniformToStorage_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 3>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-fn f() {
-  s[3].m[1] = u[2].m[0];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-fn f() {
-  s[3].m[1] = u[2u].m_0;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyColumnSwizzle_UniformToWorkgroup_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-var<workgroup> w : array<S, 4>;
-
-fn f() {
-  w[3].m[1] = u[2].m[0].yx.yx;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
-
-var<workgroup> w : array<S, 4>;
-
-fn f() {
-  w[3].m[1] = u[2u].m_0.yx.yx;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyScalar_UniformToPrivate_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 3>;
-
-var<private> p : array<S, 4>;
-
-fn f() {
-  p[3].m[1].x = u[2].m[0].y;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  v : vec4<i32>,
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
-
-var<private> p : array<S, 4>;
-
-fn f() {
-  p[3].m[1].x = u[2u].m_0[1u];
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadArray_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let l = a;
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let l = conv_arr3_mat2x2_f32(a);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let l = a[2];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let l = conv_mat2x2_f32(a[2u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_mat2x2_f32(a[I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let l = a[2][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn f() {
-  let l = a[2u].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[2][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn load_a_2_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return a[2u].col0;
-    }
-    case 1u: {
-      return a[2u].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_2_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn load_a_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
-  switch(p1) {
-    case 0u: {
-      return a[p0].col0;
-    }
-    case 1u: {
-      return a[p0].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_p0_p1(u32(I), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructArrayMatUniform_LoadStruct_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s;
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn conv_S(val : S_std140) -> S {
-  return S(conv_arr3_mat2x2_f32(val.a));
-}
-
-fn f() {
-  let l = conv_S(s);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructArrayMatUniform_LoadArray_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.a;
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let l = conv_arr3_mat2x2_f32(s.a);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.a[2];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let l = conv_mat2x2_f32(s.a[2u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 1;
-  let l = s.a[I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_mat2x2_f32(s.a[I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       StructArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.a[2][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn f() {
-  let l = s.a[2u].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       StructArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 1;
-  let l = s.a[I][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn f() {
-  let I = 1;
-  let l = s.a[I].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       StructArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 1;
-  let l = s.a[2][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_a_2_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return s.a[2u].col0;
-    }
-    case 1u: {
-      return s.a[2u].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_s_a_2_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       StructArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 1;
-  let l = s.a[I][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_a_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
-  switch(p1) {
-    case 0u: {
-      return s.a[p0].col0;
-    }
-    case 1u: {
-      return s.a[p0].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_s_a_p0_p1(u32(I), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArrays_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let l = a;
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn conv_arr4_arr3_mat2x2_f32(val : array<array<mat2x2_f32, 3u>, 4u>) -> array<array<mat2x2<f32>, 3u>, 4u> {
-  var arr : array<array<mat2x2<f32>, 3u>, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_arr3_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let l = conv_arr4_arr3_mat2x2_f32(a);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArray_ConstOuterArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let l = a[3];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let l = conv_arr3_mat2x2_f32(a[3u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArray_VariableOuterArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_arr3_mat2x2_f32(a[I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_ConstInnerArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let l = a[3][2];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let l = conv_mat2x2_f32(a[3u][2u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_VariableInnerArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[3][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_mat2x2_f32(a[3u][I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_ConstInnerArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][2];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_mat2x2_f32(a[I][2u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_VariableInnerArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_mat2x2_f32(a[I][I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let l = a[3][2][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn f() {
-  let l = a[3u][2u].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[3][2][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn load_a_3_2_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return a[3u][2u].col0;
-    }
-    case 1u: {
-      return a[3u][2u].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_3_2_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[3][I][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn f() {
-  let I = 1;
-  let l = a[3u][I].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = a[3][I][J];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn load_a_3_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
-  switch(p1) {
-    case 0u: {
-      return a[3u][p0].col0;
-    }
-    case 1u: {
-      return a[3u][p0].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = load_a_3_p0_p1(u32(I), u32(J));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][2][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][2u].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = a[I][2][J];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn load_a_p0_2_p1(p0 : u32, p1 : u32) -> vec2<f32> {
-  switch(p1) {
-    case 0u: {
-      return a[p0][2u].col0;
-    }
-    case 1u: {
-      return a[p0][2u].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = load_a_p0_2_p1(u32(I), u32(J));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = a[I][J][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = a[I][J].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 0;
-  let J = 1;
-  let K = 2;
-  let l = a[I][J][K];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn load_a_p0_p1_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
-  switch(p2) {
-    case 0u: {
-      return a[p0][p1].col0;
-    }
-    case 1u: {
-      return a[p0][p1].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let J = 1;
-  let K = 2;
-  let l = load_a_p0_p1_p2(u32(I), u32(J), u32(K));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
 }  // namespace
 }  // namespace tint::transform
diff --git a/src/tint/writer/hlsl/generator_impl.cc b/src/tint/writer/hlsl/generator_impl.cc
index dcad1b5..1e786da 100644
--- a/src/tint/writer/hlsl/generator_impl.cc
+++ b/src/tint/writer/hlsl/generator_impl.cc
@@ -1097,31 +1097,62 @@
     const auto& args = expr->args;
     auto* offset_arg = builder_.Sem().Get(args[1]);
 
-    uint32_t scalar_offset_value = 0;
-    std::string scalar_offset_expr;
+    // offset in bytes
+    uint32_t scalar_offset_bytes = 0;
+    // offset in uint (4 bytes)
+    uint32_t scalar_offset_index = 0;
+    // expression to calculate offset in bytes
+    std::string scalar_offset_bytes_expr;
+    // expression to calculate offset in uint, by dividing scalar_offset_bytes_expr by 4
+    std::string scalar_offset_index_expr;
+    // expression to calculate offset in uint, independently
+    std::string scalar_offset_index_unified_expr;
 
-    // If true, use scalar_offset_value, otherwise use scalar_offset_expr
+    // If true, use scalar_offset_index, otherwise use scalar_offset_index_expr
     bool scalar_offset_constant = false;
 
     if (auto* val = offset_arg->ConstantValue()) {
         TINT_ASSERT(Writer, val->Type()->Is<sem::U32>());
-        scalar_offset_value = static_cast<uint32_t>(std::get<AInt>(val->Value()));
-        scalar_offset_value /= 4;  // bytes -> scalar index
+        scalar_offset_bytes = static_cast<uint32_t>(std::get<AInt>(val->Value()));
+        scalar_offset_index = scalar_offset_bytes / 4;  // bytes -> scalar index
         scalar_offset_constant = true;
     }
 
+    // If true, scalar_offset_bytes or scalar_offset_bytes_expr should be used, otherwise only use
+    // scalar_offset_index or scalar_offset_index_unified_expr. Currently only loading f16 scalar
+    // require using offset in bytes.
+    const bool need_offset_in_bytes =
+        intrinsic->type == transform::DecomposeMemoryAccess::Intrinsic::DataType::kF16;
+
     if (!scalar_offset_constant) {
         // UBO offset not compile-time known.
         // Calculate the scalar offset into a temporary.
-        scalar_offset_expr = UniqueIdentifier("scalar_offset");
-        auto pre = line();
-        pre << "const uint " << scalar_offset_expr << " = (";
-        if (!EmitExpression(pre, args[1])) {  // offset
-            return false;
+        if (need_offset_in_bytes) {
+            scalar_offset_bytes_expr = UniqueIdentifier("scalar_offset_bytes");
+            scalar_offset_index_expr = UniqueIdentifier("scalar_offset_index");
+            {
+                auto pre = line();
+                pre << "const uint " << scalar_offset_bytes_expr << " = (";
+                if (!EmitExpression(pre, args[1])) {  // offset
+                    return false;
+                }
+                pre << ");";
+            }
+            line() << "const uint " << scalar_offset_index_expr << " = " << scalar_offset_bytes_expr
+                   << " / 4;";
+        } else {
+            scalar_offset_index_unified_expr = UniqueIdentifier("scalar_offset");
+            auto pre = line();
+            pre << "const uint " << scalar_offset_index_unified_expr << " = (";
+            if (!EmitExpression(pre, args[1])) {  // offset
+                return false;
+            }
+            pre << ") / 4;";
         }
-        pre << ") / 4;";
     }
 
+    constexpr const char swizzle[] = {'x', 'y', 'z', 'w'};
+
     using Op = transform::DecomposeMemoryAccess::Intrinsic::Op;
     using DataType = transform::DecomposeMemoryAccess::Intrinsic::DataType;
     switch (intrinsic->op) {
@@ -1132,27 +1163,28 @@
                 out << ")";
                 return result;
             };
-            auto load_scalar = [&]() {
-                if (!EmitExpression(out, args[0])) {  // buffer
+            auto load_u32_to = [&](std::ostream& target) {
+                if (!EmitExpression(target, args[0])) {  // buffer
                     return false;
                 }
                 if (scalar_offset_constant) {
-                    char swizzle[] = {'x', 'y', 'z', 'w'};
-                    out << "[" << (scalar_offset_value / 4) << "]."
-                        << swizzle[scalar_offset_value & 3];
+                    target << "[" << (scalar_offset_index / 4) << "]."
+                           << swizzle[scalar_offset_index & 3];
                 } else {
-                    out << "[" << scalar_offset_expr << " / 4][" << scalar_offset_expr << " % 4]";
+                    target << "[" << scalar_offset_index_unified_expr << " / 4]["
+                           << scalar_offset_index_unified_expr << " % 4]";
                 }
                 return true;
             };
+            auto load_u32 = [&] { return load_u32_to(out); };
             // Has a minimum alignment of 8 bytes, so is either .xy or .zw
-            auto load_vec2 = [&] {
+            auto load_vec2_u32_to = [&](std::ostream& target) {
                 if (scalar_offset_constant) {
-                    if (!EmitExpression(out, args[0])) {  // buffer
+                    if (!EmitExpression(target, args[0])) {  // buffer
                         return false;
                     }
-                    out << "[" << (scalar_offset_value / 4) << "]";
-                    out << ((scalar_offset_value & 2) == 0 ? ".xy" : ".zw");
+                    target << "[" << (scalar_offset_index / 4) << "]";
+                    target << ((scalar_offset_index & 2) == 0 ? ".xy" : ".zw");
                 } else {
                     std::string ubo_load = UniqueIdentifier("ubo_load");
                     {
@@ -1161,58 +1193,190 @@
                         if (!EmitExpression(pre, args[0])) {  // buffer
                             return false;
                         }
-                        pre << "[" << scalar_offset_expr << " / 4];";
+                        pre << "[" << scalar_offset_index_unified_expr << " / 4];";
                     }
-                    out << "((" << scalar_offset_expr << " & 2) ? " << ubo_load
-                        << ".zw : " << ubo_load << ".xy)";
+                    target << "((" << scalar_offset_index_unified_expr << " & 2) ? " << ubo_load
+                           << ".zw : " << ubo_load << ".xy)";
                 }
                 return true;
             };
+            auto load_vec2_u32 = [&] { return load_vec2_u32_to(out); };
             // vec4 has a minimum alignment of 16 bytes, easiest case
-            auto load_vec4 = [&] {
+            auto load_vec4_u32 = [&] {
                 if (!EmitExpression(out, args[0])) {  // buffer
                     return false;
                 }
                 if (scalar_offset_constant) {
-                    out << "[" << (scalar_offset_value / 4) << "]";
+                    out << "[" << (scalar_offset_index / 4) << "]";
                 } else {
-                    out << "[" << scalar_offset_expr << " / 4]";
+                    out << "[" << scalar_offset_index_unified_expr << " / 4]";
                 }
                 return true;
             };
             // vec3 has a minimum alignment of 16 bytes, so is just a .xyz swizzle
-            auto load_vec3 = [&] {
-                if (!load_vec4()) {
+            auto load_vec3_u32 = [&] {
+                if (!load_vec4_u32()) {
                     return false;
                 }
                 out << ".xyz";
                 return true;
             };
+            auto load_scalar_f16 = [&] {
+                // offset bytes = 4k,   ((buffer[index].x) & 0xFFFF)
+                // offset bytes = 4k+2, ((buffer[index].x >> 16) & 0xFFFF)
+                out << "float16_t(f16tof32(((";
+                if (!EmitExpression(out, args[0])) {  // buffer
+                    return false;
+                }
+                if (scalar_offset_constant) {
+                    out << "[" << (scalar_offset_index / 4) << "]."
+                        << swizzle[scalar_offset_index & 3];
+                    // WGSL spec ensure little endian memory layout.
+                    if (scalar_offset_bytes % 4 == 0) {
+                        out << ") & 0xFFFF)";
+                    } else {
+                        out << " >> 16) & 0xFFFF)";
+                    }
+                } else {
+                    out << "[" << scalar_offset_index_expr << " / 4][" << scalar_offset_index_expr
+                        << " % 4] >> (" << scalar_offset_bytes_expr
+                        << " % 4 == 0 ? 0 : 16)) & 0xFFFF)";
+                }
+                out << "))";
+                return true;
+            };
+            auto load_vec2_f16 = [&] {
+                // vec2<f16> is aligned to 4 bytes
+                // Preclude code load the vec2<f16> data as a uint:
+                //     uint ubo_load = buffer[id0][id1];
+                // Loading code convert it to vec2<f16>:
+                //     vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)),
+                //     float16_t(f16tof32(ubo_load >> 16)))
+                std::string ubo_load = UniqueIdentifier("ubo_load");
+                {
+                    auto pre = line();
+                    // Load the 4 bytes f16 vector as an uint
+                    pre << "uint " << ubo_load << " = ";
+                    if (!load_u32_to(pre)) {
+                        return false;
+                    }
+                    pre << ";";
+                }
+                out << "vector<float16_t, 2>(float16_t(f16tof32(" << ubo_load
+                    << " & 0xFFFF)), float16_t(f16tof32(" << ubo_load << " >> 16)))";
+                return true;
+            };
+            auto load_vec3_f16 = [&] {
+                // vec3<f16> is aligned to 8 bytes
+                // Preclude code load the vec3<f16> data as uint2 and convert its elements to
+                // float16_t:
+                //     uint2 ubo_load = buffer[id0].xy;
+                //     /* The low 8 bits of two uint are the x and z elements of vec3<f16> */
+                //     vector<float16_t> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load &
+                //     0xFFFF));
+                //     /* The high 8 bits of first uint is the y element of vec3<f16> */
+                //     float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+                // Loading code convert it to vec3<f16>:
+                //     vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1])
+                std::string ubo_load = UniqueIdentifier("ubo_load");
+                std::string ubo_load_xz = UniqueIdentifier(ubo_load + "_xz");
+                std::string ubo_load_y = UniqueIdentifier(ubo_load + "_y");
+                {
+                    auto pre = line();
+                    // Load the 8 bytes uint2 with the f16 vector at lower 6 bytes
+                    pre << "uint2 " << ubo_load << " = ";
+                    if (!load_vec2_u32_to(pre)) {
+                        return false;
+                    }
+                    pre << ";";
+                }
+                {
+                    auto pre = line();
+                    pre << "vector<float16_t, 2> " << ubo_load_xz
+                        << " = vector<float16_t, 2>(f16tof32(" << ubo_load << " & 0xFFFF));";
+                }
+                {
+                    auto pre = line();
+                    pre << "float16_t " << ubo_load_y << " = f16tof32(" << ubo_load
+                        << "[0] >> 16);";
+                }
+                out << "vector<float16_t, 3>(" << ubo_load_xz << "[0], " << ubo_load_y << ", "
+                    << ubo_load_xz << "[1])";
+                return true;
+            };
+            auto load_vec4_f16 = [&] {
+                // vec4<f16> is aligned to 8 bytes
+                // Preclude code load the vec4<f16> data as uint2 and convert its elements to
+                // float16_t:
+                //     uint2 ubo_load = buffer[id0].xy;
+                //     /* The low 8 bits of two uint are the x and z elements of vec4<f16> */
+                //     vector<float16_t> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load &
+                //     0xFFFF));
+                //     /* The high 8 bits of two uint are the y and w elements of vec4<f16> */
+                //     vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >>
+                //     16));
+                // Loading code convert it to vec4<f16>:
+                //     vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1],
+                //     ubo_load_yw[1])
+                std::string ubo_load = UniqueIdentifier("ubo_load");
+                std::string ubo_load_xz = UniqueIdentifier(ubo_load + "_xz");
+                std::string ubo_load_yw = UniqueIdentifier(ubo_load + "_yw");
+                {
+                    auto pre = line();
+                    // Load the 8 bytes f16 vector as an uint2
+                    pre << "uint2 " << ubo_load << " = ";
+                    if (!load_vec2_u32_to(pre)) {
+                        return false;
+                    }
+                    pre << ";";
+                }
+                {
+                    auto pre = line();
+                    pre << "vector<float16_t, 2> " << ubo_load_xz
+                        << " = vector<float16_t, 2>(f16tof32(" << ubo_load << " & 0xFFFF));";
+                }
+                {
+                    auto pre = line();
+                    pre << "vector<float16_t, 2> " << ubo_load_yw
+                        << " = vector<float16_t, 2>(f16tof32(" << ubo_load << " >> 16));";
+                }
+                out << "vector<float16_t, 4>(" << ubo_load_xz << "[0], " << ubo_load_yw << "[0], "
+                    << ubo_load_xz << "[1], " << ubo_load_yw << "[1])";
+                return true;
+            };
             switch (intrinsic->type) {
                 case DataType::kU32:
-                    return load_scalar();
+                    return load_u32();
                 case DataType::kF32:
-                    return cast("asfloat", load_scalar);
+                    return cast("asfloat", load_u32);
                 case DataType::kI32:
-                    return cast("asint", load_scalar);
+                    return cast("asint", load_u32);
+                case DataType::kF16:
+                    return load_scalar_f16();
                 case DataType::kVec2U32:
-                    return load_vec2();
+                    return load_vec2_u32();
                 case DataType::kVec2F32:
-                    return cast("asfloat", load_vec2);
+                    return cast("asfloat", load_vec2_u32);
                 case DataType::kVec2I32:
-                    return cast("asint", load_vec2);
+                    return cast("asint", load_vec2_u32);
+                case DataType::kVec2F16:
+                    return load_vec2_f16();
                 case DataType::kVec3U32:
-                    return load_vec3();
+                    return load_vec3_u32();
                 case DataType::kVec3F32:
-                    return cast("asfloat", load_vec3);
+                    return cast("asfloat", load_vec3_u32);
                 case DataType::kVec3I32:
-                    return cast("asint", load_vec3);
+                    return cast("asint", load_vec3_u32);
+                case DataType::kVec3F16:
+                    return load_vec3_f16();
                 case DataType::kVec4U32:
-                    return load_vec4();
+                    return load_vec4_u32();
                 case DataType::kVec4F32:
-                    return cast("asfloat", load_vec4);
+                    return cast("asfloat", load_vec4_u32);
                 case DataType::kVec4I32:
-                    return cast("asint", load_vec4);
+                    return cast("asint", load_vec4_u32);
+                case DataType::kVec4F16:
+                    return load_vec4_f16();
             }
             TINT_UNREACHABLE(Writer, diagnostics_)
                 << "unsupported DecomposeMemoryAccess::Intrinsic::DataType: "
@@ -1257,6 +1421,20 @@
                 }
                 return true;
             };
+            // Templated load used for f16 types, requires SM6.2 or higher and DXC
+            // Used by loading f16 types, e.g. for f16 type, set type parameter to "float16_t"
+            // to emit `buffer.Load<float16_t>(offset)`.
+            auto templated_load = [&](const char* type) {
+                if (!EmitExpression(out, args[0])) {  // buffer
+                    return false;
+                }
+                out << ".Load<" << type << ">";  // templated load
+                ScopedParen sp(out);
+                if (!EmitExpression(out, args[1])) {  // offset
+                    return false;
+                }
+                return true;
+            };
             switch (intrinsic->type) {
                 case DataType::kU32:
                     return load(nullptr, 1);
@@ -1264,24 +1442,32 @@
                     return load("asfloat", 1);
                 case DataType::kI32:
                     return load("asint", 1);
+                case DataType::kF16:
+                    return templated_load("float16_t");
                 case DataType::kVec2U32:
                     return load(nullptr, 2);
                 case DataType::kVec2F32:
                     return load("asfloat", 2);
                 case DataType::kVec2I32:
                     return load("asint", 2);
+                case DataType::kVec2F16:
+                    return templated_load("vector<float16_t, 2> ");
                 case DataType::kVec3U32:
                     return load(nullptr, 3);
                 case DataType::kVec3F32:
                     return load("asfloat", 3);
                 case DataType::kVec3I32:
                     return load("asint", 3);
+                case DataType::kVec3F16:
+                    return templated_load("vector<float16_t, 3> ");
                 case DataType::kVec4U32:
                     return load(nullptr, 4);
                 case DataType::kVec4F32:
                     return load("asfloat", 4);
                 case DataType::kVec4I32:
                     return load("asint", 4);
+                case DataType::kVec4F16:
+                    return templated_load("vector<float16_t, 4> ");
             }
             TINT_UNREACHABLE(Writer, diagnostics_)
                 << "unsupported DecomposeMemoryAccess::Intrinsic::DataType: "
@@ -1309,6 +1495,24 @@
                 }
                 return true;
             };
+            // Templated stored used for f16 types, requires SM6.2 or higher and DXC
+            // Used by storing f16 types, e.g. for f16 type, set type parameter to "float16_t"
+            // to emit `buffer.Store<float16_t>(offset)`.
+            auto templated_store = [&](const char* type) {
+                if (!EmitExpression(out, args[0])) {  // buffer
+                    return false;
+                }
+                out << ".Store<" << type << ">";  // templated store
+                ScopedParen sp1(out);
+                if (!EmitExpression(out, args[1])) {  // offset
+                    return false;
+                }
+                out << ", ";
+                if (!EmitExpression(out, args[2])) {  // value
+                    return false;
+                }
+                return true;
+            };
             switch (intrinsic->type) {
                 case DataType::kU32:
                     return store(1);
@@ -1316,24 +1520,32 @@
                     return store(1);
                 case DataType::kI32:
                     return store(1);
+                case DataType::kF16:
+                    return templated_store("float16_t");
                 case DataType::kVec2U32:
                     return store(2);
                 case DataType::kVec2F32:
                     return store(2);
                 case DataType::kVec2I32:
                     return store(2);
+                case DataType::kVec2F16:
+                    return templated_store("vector<float16_t, 2> ");
                 case DataType::kVec3U32:
                     return store(3);
                 case DataType::kVec3F32:
                     return store(3);
                 case DataType::kVec3I32:
                     return store(3);
+                case DataType::kVec3F16:
+                    return templated_store("vector<float16_t, 3> ");
                 case DataType::kVec4U32:
                     return store(4);
                 case DataType::kVec4F32:
                     return store(4);
                 case DataType::kVec4I32:
                     return store(4);
+                case DataType::kVec4F16:
+                    return templated_store("vector<float16_t, 4> ");
             }
             TINT_UNREACHABLE(Writer, diagnostics_)
                 << "unsupported DecomposeMemoryAccess::Intrinsic::DataType: "
diff --git a/src/tint/writer/hlsl/generator_impl_member_accessor_test.cc b/src/tint/writer/hlsl/generator_impl_member_accessor_test.cc
index 1fbef9b..b3440b8 100644
--- a/src/tint/writer/hlsl/generator_impl_member_accessor_test.cc
+++ b/src/tint/writer/hlsl/generator_impl_member_accessor_test.cc
@@ -34,6 +34,9 @@
 inline const ast::Type* ty_f32(const ProgramBuilder::TypesBuilder& ty) {
     return ty.f32();
 }
+inline const ast::Type* ty_f16(const ProgramBuilder::TypesBuilder& ty) {
+    return ty.f16();
+}
 template <typename T>
 inline const ast::Type* ty_vec2(const ProgramBuilder::TypesBuilder& ty) {
     return ty.vec2<T>();
@@ -94,6 +97,14 @@
                     b.Group(1_a), b.Binding(0_a));
     }
 
+    void SetupUniformBuffer(utils::VectorRef<const ast::StructMember*> members) {
+        ProgramBuilder& b = *this;
+        auto* s = b.Structure("Data", members);
+
+        b.GlobalVar("data", b.ty.Of(s), ast::AddressSpace::kUniform, ast::Access::kUndefined,
+                    b.Group(1_a), b.Binding(1_a));
+    }
+
     void SetupFunction(utils::VectorRef<const ast::Statement*> statements) {
         ProgramBuilder& b = *this;
         utils::Vector attrs{
@@ -144,18 +155,21 @@
     return out;
 }
 
-using HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad =
+using HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_ConstantOffset =
     HlslGeneratorImplTest_MemberAccessorWithParam<TypeCase>;
-TEST_P(HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad, Test) {
+
+TEST_P(HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_ConstantOffset, Test) {
     // struct Data {
-    //   a : i32;
-    //   b : <type>;
+    //   a : i32,
+    //   b : <type>,
     // };
     // var<storage> data : Data;
     // data.b;
 
     auto p = GetParam();
 
+    Enable(ast::Extension::kF16);
+
     SetupStorageBuffer(utils::Vector{
         Member("a", ty.i32()),
         Member("b", p.member_type(ty)),
@@ -173,60 +187,813 @@
 
 INSTANTIATE_TEST_SUITE_P(
     HlslGeneratorImplTest_MemberAccessor,
-    HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad,
+    HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_ConstantOffset,
+    testing::Values(TypeCase{ty_u32, "data.Load(4u)"},
+                    TypeCase{ty_f32, "asfloat(data.Load(4u))"},
+                    TypeCase{ty_i32, "asint(data.Load(4u))"},
+                    TypeCase{ty_f16, "data.Load<float16_t>(4u)"},
+                    TypeCase{ty_vec2<u32>, "data.Load2(8u)"},
+                    TypeCase{ty_vec2<f32>, "asfloat(data.Load2(8u))"},
+                    TypeCase{ty_vec2<i32>, "asint(data.Load2(8u))"},
+                    TypeCase{ty_vec2<f16>, "data.Load<vector<float16_t, 2> >(4u)"},
+                    TypeCase{ty_vec3<u32>, "data.Load3(16u)"},
+                    TypeCase{ty_vec3<f32>, "asfloat(data.Load3(16u))"},
+                    TypeCase{ty_vec3<i32>, "asint(data.Load3(16u))"},
+                    TypeCase{ty_vec3<f16>, "data.Load<vector<float16_t, 3> >(8u)"},
+                    TypeCase{ty_vec4<u32>, "data.Load4(16u)"},
+                    TypeCase{ty_vec4<f32>, "asfloat(data.Load4(16u))"},
+                    TypeCase{ty_vec4<i32>, "asint(data.Load4(16u))"},
+                    TypeCase{ty_vec4<f16>, "data.Load<vector<float16_t, 4> >(8u)"},
+                    TypeCase{ty_mat2x2<f32>,
+                             "return float2x2(asfloat(buffer.Load2((offset + 0u))), "
+                             "asfloat(buffer.Load2((offset + 8u))));"},
+                    TypeCase{ty_mat2x3<f32>,
+                             "return float2x3(asfloat(buffer.Load3((offset + 0u))), "
+                             "asfloat(buffer.Load3((offset + 16u))));"},
+                    TypeCase{ty_mat2x4<f32>,
+                             "return float2x4(asfloat(buffer.Load4((offset + 0u))), "
+                             "asfloat(buffer.Load4((offset + 16u))));"},
+                    TypeCase{ty_mat3x2<f32>,
+                             "return float3x2(asfloat(buffer.Load2((offset + 0u))), "
+                             "asfloat(buffer.Load2((offset + 8u))), "
+                             "asfloat(buffer.Load2((offset + 16u))));"},
+                    TypeCase{ty_mat3x3<f32>,
+                             "return float3x3(asfloat(buffer.Load3((offset + 0u))), "
+                             "asfloat(buffer.Load3((offset + 16u))), "
+                             "asfloat(buffer.Load3((offset + 32u))));"},
+                    TypeCase{ty_mat3x4<f32>,
+                             "return float3x4(asfloat(buffer.Load4((offset + 0u))), "
+                             "asfloat(buffer.Load4((offset + 16u))), "
+                             "asfloat(buffer.Load4((offset + 32u))));"},
+                    TypeCase{ty_mat4x2<f32>,
+                             "return float4x2(asfloat(buffer.Load2((offset + 0u))), "
+                             "asfloat(buffer.Load2((offset + 8u))), "
+                             "asfloat(buffer.Load2((offset + 16u))), "
+                             "asfloat(buffer.Load2((offset + 24u))));"},
+                    TypeCase{ty_mat4x3<f32>,
+                             "return float4x3(asfloat(buffer.Load3((offset + 0u))), "
+                             "asfloat(buffer.Load3((offset + 16u))), "
+                             "asfloat(buffer.Load3((offset + 32u))), "
+                             "asfloat(buffer.Load3((offset + 48u))));"},
+                    TypeCase{ty_mat4x4<f32>,
+                             "return float4x4(asfloat(buffer.Load4((offset + 0u))), "
+                             "asfloat(buffer.Load4((offset + 16u))), "
+                             "asfloat(buffer.Load4((offset + 32u))), "
+                             "asfloat(buffer.Load4((offset + 48u))));"},
+                    TypeCase{ty_mat2x2<f16>,
+                             "return matrix<float16_t, 2, 2>("
+                             "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 4u)));"},
+                    TypeCase{ty_mat2x3<f16>,
+                             "return matrix<float16_t, 2, 3>("
+                             "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 8u)));"},
+                    TypeCase{ty_mat2x4<f16>,
+                             "return matrix<float16_t, 2, 4>("
+                             "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 8u)));"},
+                    TypeCase{ty_mat3x2<f16>,
+                             "return matrix<float16_t, 3, 2>("
+                             "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 4u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 8u)));"},
+                    TypeCase{ty_mat3x3<f16>,
+                             "return matrix<float16_t, 3, 3>("
+                             "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 8u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 16u)));"},
+                    TypeCase{ty_mat3x4<f16>,
+                             "return matrix<float16_t, 3, 4>("
+                             "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 8u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 16u)));"},
+                    TypeCase{ty_mat4x2<f16>,
+                             "return matrix<float16_t, 4, 2>("
+                             "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 4u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 8u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 12u)));"},
+                    TypeCase{ty_mat4x3<f16>,
+                             "return matrix<float16_t, 4, 3>("
+                             "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 8u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 16u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 24u)));"},
+                    TypeCase{ty_mat4x4<f16>,
+                             "return matrix<float16_t, 4, 4>("
+                             "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 8u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 16u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 24u)));"}));
+
+using HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_DynamicOffset =
+    HlslGeneratorImplTest_MemberAccessorWithParam<TypeCase>;
+
+TEST_P(HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_DynamicOffset, Test) {
+    // struct Inner {
+    //   a : i32,
+    //   b : <type>,
+    //   c : vec4<i32>,
+    // };
+    // struct Data {
+    //  arr : array<Inner, 4i>,
+    // }
+    // var<storage> data : Data;
+    // data.arr[i].b;
+
+    auto p = GetParam();
+
+    Enable(ast::Extension::kF16);
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.i32()),
+                                         Member("b", p.member_type(ty)),
+                                         Member("c", ty.vec4(ty.i32())),
+                                     });
+
+    SetupStorageBuffer(utils::Vector{
+        Member("arr", ty.array(ty.Of(inner), 4_i)),
+    });
+
+    auto* i = Var("i", Expr(2_i));
+
+    SetupFunction(utils::Vector{
+        Decl(i),
+        Decl(Var("x", MemberAccessor(IndexAccessor(MemberAccessor("data", "arr"), i), "b"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    EXPECT_THAT(gen.result(), HasSubstr(p.expected));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    HlslGeneratorImplTest_MemberAccessor,
+    HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_DynamicOffset,
     testing::Values(
-        TypeCase{ty_u32, "data.Load(4u)"},
-        TypeCase{ty_f32, "asfloat(data.Load(4u))"},
-        TypeCase{ty_i32, "asint(data.Load(4u))"},
-        TypeCase{ty_vec2<u32>, "data.Load2(8u)"},
-        TypeCase{ty_vec2<f32>, "asfloat(data.Load2(8u))"},
-        TypeCase{ty_vec2<i32>, "asint(data.Load2(8u))"},
-        TypeCase{ty_vec3<u32>, "data.Load3(16u)"},
-        TypeCase{ty_vec3<f32>, "asfloat(data.Load3(16u))"},
-        TypeCase{ty_vec3<i32>, "asint(data.Load3(16u))"},
-        TypeCase{ty_vec4<u32>, "data.Load4(16u)"},
-        TypeCase{ty_vec4<f32>, "asfloat(data.Load4(16u))"},
-        TypeCase{ty_vec4<i32>, "asint(data.Load4(16u))"},
-        TypeCase{
-            ty_mat2x2<f32>,
-            R"(return float2x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))));)"},
-        TypeCase{
-            ty_mat2x3<f32>,
-            R"(return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));)"},
-        TypeCase{
-            ty_mat2x4<f32>,
-            R"(return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));)"},
-        TypeCase{
-            ty_mat3x2<f32>,
-            R"(return float3x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))));)"},
-        TypeCase{
-            ty_mat3x3<f32>,
-            R"(return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));)"},
-        TypeCase{
-            ty_mat3x4<f32>,
-            R"(return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));)"},
-        TypeCase{
-            ty_mat4x2<f32>,
-            R"(return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));)"},
-        TypeCase{
-            ty_mat4x3<f32>,
-            R"(return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));)"},
-        TypeCase{
-            ty_mat4x4<f32>,
-            R"(return float4x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))), asfloat(buffer.Load4((offset + 48u))));)"}));
+        TypeCase{ty_u32, "data.Load(((32u * uint(i)) + 4u))"},
+        TypeCase{ty_f32, "asfloat(data.Load(((32u * uint(i)) + 4u)))"},
+        TypeCase{ty_i32, "asint(data.Load(((32u * uint(i)) + 4u)))"},
+        TypeCase{ty_f16, "data.Load<float16_t>(((32u * uint(i)) + 4u))"},
+        TypeCase{ty_vec2<u32>, "data.Load2(((32u * uint(i)) + 8u))"},
+        TypeCase{ty_vec2<f32>, "asfloat(data.Load2(((32u * uint(i)) + 8u)))"},
+        TypeCase{ty_vec2<i32>, "asint(data.Load2(((32u * uint(i)) + 8u)))"},
+        TypeCase{ty_vec2<f16>, "data.Load<vector<float16_t, 2> >(((32u * uint(i)) + 4u))"},
+        TypeCase{ty_vec3<u32>, "data.Load3(((48u * uint(i)) + 16u))"},
+        TypeCase{ty_vec3<f32>, "asfloat(data.Load3(((48u * uint(i)) + 16u)))"},
+        TypeCase{ty_vec3<i32>, "asint(data.Load3(((48u * uint(i)) + 16u)))"},
+        TypeCase{ty_vec3<f16>, "data.Load<vector<float16_t, 3> >(((32u * uint(i)) + 8u))"},
+        TypeCase{ty_vec4<u32>, "data.Load4(((48u * uint(i)) + 16u))"},
+        TypeCase{ty_vec4<f32>, "asfloat(data.Load4(((48u * uint(i)) + 16u)))"},
+        TypeCase{ty_vec4<i32>, "asint(data.Load4(((48u * uint(i)) + 16u)))"},
+        TypeCase{ty_vec4<f16>, "data.Load<vector<float16_t, 4> >(((32u * uint(i)) + 8u))"},
+        TypeCase{ty_mat2x2<f32>,
+                 "return float2x2(asfloat(buffer.Load2((offset + 0u))), "
+                 "asfloat(buffer.Load2((offset + 8u))));"},
+        TypeCase{ty_mat2x3<f32>,
+                 "return float2x3(asfloat(buffer.Load3((offset + 0u))), "
+                 "asfloat(buffer.Load3((offset + 16u))));"},
+        TypeCase{ty_mat2x4<f32>,
+                 "return float2x4(asfloat(buffer.Load4((offset + 0u))), "
+                 "asfloat(buffer.Load4((offset + 16u))));"},
+        TypeCase{ty_mat3x2<f32>,
+                 "return float3x2(asfloat(buffer.Load2((offset + 0u))), "
+                 "asfloat(buffer.Load2((offset + 8u))), "
+                 "asfloat(buffer.Load2((offset + 16u))));"},
+        TypeCase{ty_mat3x3<f32>,
+                 "return float3x3(asfloat(buffer.Load3((offset + 0u))), "
+                 "asfloat(buffer.Load3((offset + 16u))), "
+                 "asfloat(buffer.Load3((offset + 32u))));"},
+        TypeCase{ty_mat3x4<f32>,
+                 "return float3x4(asfloat(buffer.Load4((offset + 0u))), "
+                 "asfloat(buffer.Load4((offset + 16u))), "
+                 "asfloat(buffer.Load4((offset + 32u))));"},
+        TypeCase{ty_mat4x2<f32>,
+                 "return float4x2(asfloat(buffer.Load2((offset + 0u))), "
+                 "asfloat(buffer.Load2((offset + 8u))), "
+                 "asfloat(buffer.Load2((offset + 16u))), "
+                 "asfloat(buffer.Load2((offset + 24u))));"},
+        TypeCase{ty_mat4x3<f32>,
+                 "return float4x3(asfloat(buffer.Load3((offset + 0u))), "
+                 "asfloat(buffer.Load3((offset + 16u))), "
+                 "asfloat(buffer.Load3((offset + 32u))), "
+                 "asfloat(buffer.Load3((offset + 48u))));"},
+        TypeCase{ty_mat4x4<f32>,
+                 "return float4x4(asfloat(buffer.Load4((offset + 0u))), "
+                 "asfloat(buffer.Load4((offset + 16u))), "
+                 "asfloat(buffer.Load4((offset + 32u))), "
+                 "asfloat(buffer.Load4((offset + 48u))));"},
+        TypeCase{ty_mat2x2<f16>,
+                 "return matrix<float16_t, 2, 2>("
+                 "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 4u)));"},
+        TypeCase{ty_mat2x3<f16>,
+                 "return matrix<float16_t, 2, 3>("
+                 "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 8u)));"},
+        TypeCase{ty_mat2x4<f16>,
+                 "return matrix<float16_t, 2, 4>("
+                 "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 8u)));"},
+        TypeCase{ty_mat3x2<f16>,
+                 "return matrix<float16_t, 3, 2>("
+                 "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 4u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 8u)));"},
+        TypeCase{ty_mat3x3<f16>,
+                 "return matrix<float16_t, 3, 3>("
+                 "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 8u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 16u)));"},
+        TypeCase{ty_mat3x4<f16>,
+                 "return matrix<float16_t, 3, 4>("
+                 "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 8u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 16u)));"},
+        TypeCase{ty_mat4x2<f16>,
+                 "return matrix<float16_t, 4, 2>("
+                 "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 4u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 8u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 12u)));"},
+        TypeCase{ty_mat4x3<f16>,
+                 "return matrix<float16_t, 4, 3>("
+                 "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 8u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 16u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 24u)));"},
+        TypeCase{ty_mat4x4<f16>,
+                 "return matrix<float16_t, 4, 4>("
+                 "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 8u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 16u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 24u)));"}));
+
+using HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_ConstantOffset =
+    HlslGeneratorImplTest_MemberAccessorWithParam<TypeCase>;
+TEST_P(HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_ConstantOffset, Test) {
+    // struct Data {
+    //   a : i32,
+    //   b : <type>,
+    // };
+    // var<uniform> data : Data;
+    // data.b;
+
+    auto p = GetParam();
+
+    Enable(ast::Extension::kF16);
+
+    SetupUniformBuffer(utils::Vector{
+        Member("a", ty.i32()),
+        Member("b", p.member_type(ty)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", MemberAccessor("data", "b"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    EXPECT_THAT(gen.result(), HasSubstr(p.expected));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    HlslGeneratorImplTest_MemberAccessor,
+    HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_ConstantOffset,
+    testing::Values(TypeCase{ty_u32, "uint x = data[0].y;"},
+                    TypeCase{ty_f32, "float x = asfloat(data[0].y);"},
+                    TypeCase{ty_i32, "int x = asint(data[0].y);"},
+                    TypeCase{ty_f16, "float16_t x = float16_t(f16tof32(((data[0].y) & 0xFFFF)));"},
+                    TypeCase{ty_vec2<u32>, "uint2 x = data[0].zw;"},
+                    TypeCase{ty_vec2<f32>, "float2 x = asfloat(data[0].zw);"},
+                    TypeCase{ty_vec2<i32>, "int2 x = asint(data[0].zw);"},
+                    TypeCase{ty_vec2<f16>, R"(uint ubo_load = data[0].y;
+  vector<float16_t, 2> x = vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16)));)"},
+                    TypeCase{ty_vec3<u32>, "uint3 x = data[1].xyz;"},
+                    TypeCase{ty_vec3<f32>, "float3 x = asfloat(data[1].xyz);"},
+                    TypeCase{ty_vec3<i32>, "int3 x = asint(data[1].xyz);"},
+                    TypeCase{ty_vec3<f16>, R"(uint2 ubo_load = data[0].zw;
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  vector<float16_t, 3> x = vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]);)"},
+                    TypeCase{ty_vec4<u32>, "uint4 x = data[1];"},
+                    TypeCase{ty_vec4<f32>, "float4 x = asfloat(data[1]);"},
+                    TypeCase{ty_vec4<i32>, "int4 x = asint(data[1]);"},
+                    TypeCase{ty_vec4<f16>,
+                             R"(uint2 ubo_load = data[0].zw;
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  vector<float16_t, 4> x = vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]);)"},
+                    TypeCase{ty_mat2x2<f32>, R"(float2x2 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+})"},
+                    TypeCase{ty_mat2x3<f32>, R"(float2x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+})"},
+                    TypeCase{ty_mat2x4<f32>, R"(float2x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+})"},
+                    TypeCase{ty_mat3x2<f32>, R"(float3x2 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+})"},
+                    TypeCase{ty_mat3x3<f32>, R"(float3x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+})"},
+                    TypeCase{ty_mat3x4<f32>, R"(float3x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+})"},
+                    TypeCase{ty_mat4x2<f32>, R"(float4x2 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+})"},
+                    TypeCase{ty_mat4x3<f32>, R"(float4x3 tint_symbol(uint4 buffer[5], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+})"},
+                    TypeCase{ty_mat4x4<f32>, R"(float4x4 tint_symbol(uint4 buffer[5], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+})"},
+                    TypeCase{ty_mat2x2<f16>,
+                             R"(matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+})"},
+                    TypeCase{ty_mat2x3<f16>,
+                             R"(matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+})"},
+                    TypeCase{ty_mat2x4<f16>,
+                             R"(matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+})"},
+                    TypeCase{ty_mat3x2<f16>,
+                             R"(matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+})"},
+                    TypeCase{ty_mat3x3<f16>,
+                             R"(matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+})"},
+                    TypeCase{ty_mat3x4<f16>,
+                             R"(matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));)"},
+                    TypeCase{ty_mat4x2<f16>,
+                             R"(matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+})"},
+                    TypeCase{ty_mat4x3<f16>,
+                             R"(matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+})"},
+                    TypeCase{ty_mat4x4<f16>,
+                             R"(matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+})"}));
+
+using HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_DynamicOffset =
+    HlslGeneratorImplTest_MemberAccessorWithParam<TypeCase>;
+
+TEST_P(HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_DynamicOffset, Test) {
+    // struct Inner {
+    //   a : i32,
+    //   b : <type>,
+    //   c : vec4<i32>,
+    // };
+    // struct Data {
+    //  arr : array<Inner, 4i>,
+    // }
+    // var<uniform> data : Data;
+    // data.arr[i].b;
+
+    auto p = GetParam();
+
+    Enable(ast::Extension::kF16);
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.i32()),
+                                         Member("b", p.member_type(ty)),
+                                         Member("c", ty.vec4(ty.i32())),
+                                     });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("arr", ty.array(ty.Of(inner), 4_i)),
+    });
+
+    auto* i = Var("i", Expr(2_i));
+
+    SetupFunction(utils::Vector{
+        Decl(i),
+        Decl(Var("x", MemberAccessor(IndexAccessor(MemberAccessor("data", "arr"), i), "b"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    EXPECT_THAT(gen.result(), HasSubstr(p.expected));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    HlslGeneratorImplTest_MemberAccessor,
+    HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_DynamicOffset,
+    testing::Values(
+        TypeCase{ty_u32, "x = data[scalar_offset / 4][scalar_offset % 4]"},
+        TypeCase{ty_f32, "x = asfloat(data[scalar_offset / 4][scalar_offset % 4])"},
+        TypeCase{ty_i32, "x = asint(data[scalar_offset / 4][scalar_offset % 4])"},
+        TypeCase{ty_f16, R"(const uint scalar_offset_bytes = (((32u * uint(i)) + 4u));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  float16_t x = float16_t(f16tof32(((data[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));)"},
+        TypeCase{ty_vec2<u32>, R"(uint4 ubo_load = data[scalar_offset / 4];
+  uint2 x = ((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy);)"},
+        TypeCase{ty_vec2<f32>, R"(uint4 ubo_load = data[scalar_offset / 4];
+  float2 x = asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy));)"},
+        TypeCase{ty_vec2<i32>, R"(uint4 ubo_load = data[scalar_offset / 4];
+  int2 x = asint(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy));)"},
+        TypeCase{ty_vec2<f16>, R"(const uint scalar_offset = (((32u * uint(i)) + 4u)) / 4;
+  uint ubo_load = data[scalar_offset / 4][scalar_offset % 4];
+  vector<float16_t, 2> x = vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16)));)"},
+        TypeCase{ty_vec3<u32>, "x = data[scalar_offset / 4].xyz"},
+        TypeCase{ty_vec3<f32>, "x = asfloat(data[scalar_offset / 4].xyz)"},
+        TypeCase{ty_vec3<i32>, "x = asint(data[scalar_offset / 4].xyz)"},
+        TypeCase{ty_vec3<f16>, R"(const uint scalar_offset = (((32u * uint(i)) + 8u)) / 4;
+  uint4 ubo_load_1 = data[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  vector<float16_t, 3> x = vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]);)"},
+        TypeCase{ty_vec4<u32>, "x = data[scalar_offset / 4]"},
+        TypeCase{ty_vec4<f32>, "x = asfloat(data[scalar_offset / 4])"},
+        TypeCase{ty_vec4<i32>, "x = asint(data[scalar_offset / 4])"},
+        TypeCase{ty_vec4<f16>, R"(const uint scalar_offset = (((32u * uint(i)) + 8u)) / 4;
+  uint4 ubo_load_1 = data[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  vector<float16_t, 4> x = vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]);)"},
+        TypeCase{ty_mat2x2<f32>, R"(float2x2 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+})"},
+        TypeCase{ty_mat2x3<f32>, R"(float2x3 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+})"},
+        TypeCase{ty_mat2x4<f32>, R"(float2x4 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+})"},
+        TypeCase{ty_mat3x2<f32>, R"(float3x2 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+})"},
+        TypeCase{ty_mat3x3<f32>, R"(float3x3 tint_symbol(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+})"},
+        TypeCase{ty_mat3x4<f32>, R"(float3x4 tint_symbol(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+})"},
+        TypeCase{ty_mat4x2<f32>, R"(float4x2 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+})"},
+        TypeCase{ty_mat4x3<f32>, R"(float4x3 tint_symbol(uint4 buffer[24], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+})"},
+        TypeCase{ty_mat4x4<f32>, R"(float4x4 tint_symbol(uint4 buffer[24], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+})"},
+        TypeCase{ty_mat2x2<f16>,
+                 R"(matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+})"},
+        TypeCase{ty_mat2x3<f16>,
+                 R"(matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+})"},
+        TypeCase{ty_mat2x4<f16>,
+                 R"(matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+})"},
+        TypeCase{ty_mat3x2<f16>,
+                 R"(matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+})"},
+        TypeCase{ty_mat3x3<f16>,
+                 R"(matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+})"},
+        TypeCase{ty_mat3x4<f16>,
+                 R"(matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+})"},
+        TypeCase{ty_mat4x2<f16>,
+                 R"(matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+})"},
+        TypeCase{ty_mat4x3<f16>,
+                 R"(matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+})"},
+        TypeCase{ty_mat4x4<f16>,
+                 R"(matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+})"}));
 
 using HlslGeneratorImplTest_MemberAccessor_StorageBufferStore =
     HlslGeneratorImplTest_MemberAccessorWithParam<TypeCase>;
 TEST_P(HlslGeneratorImplTest_MemberAccessor_StorageBufferStore, Test) {
     // struct Data {
-    //   a : i32;
-    //   b : <type>;
+    //   a : i32,
+    //   b : <type>,
     // };
     // var<storage> data : Data;
     // data.b = <type>();
 
     auto p = GetParam();
 
+    Enable(ast::Extension::kF16);
+
     SetupStorageBuffer(utils::Vector{
         Member("a", ty.i32()),
         Member("b", p.member_type(ty)),
@@ -243,73 +1010,123 @@
     EXPECT_THAT(gen.result(), HasSubstr(p.expected));
 }
 
-INSTANTIATE_TEST_SUITE_P(HlslGeneratorImplTest_MemberAccessor,
-                         HlslGeneratorImplTest_MemberAccessor_StorageBufferStore,
-                         testing::Values(TypeCase{ty_u32, "data.Store(4u, asuint(value))"},
-                                         TypeCase{ty_f32, "data.Store(4u, asuint(value))"},
-                                         TypeCase{ty_i32, "data.Store(4u, asuint(value))"},
-                                         TypeCase{ty_vec2<u32>, "data.Store2(8u, asuint(value))"},
-                                         TypeCase{ty_vec2<f32>, "data.Store2(8u, asuint(value))"},
-                                         TypeCase{ty_vec2<i32>, "data.Store2(8u, asuint(value))"},
-                                         TypeCase{ty_vec3<u32>, "data.Store3(16u, asuint(value))"},
-                                         TypeCase{ty_vec3<f32>, "data.Store3(16u, asuint(value))"},
-                                         TypeCase{ty_vec3<i32>, "data.Store3(16u, asuint(value))"},
-                                         TypeCase{ty_vec4<u32>, "data.Store4(16u, asuint(value))"},
-                                         TypeCase{ty_vec4<f32>, "data.Store4(16u, asuint(value))"},
-                                         TypeCase{ty_vec4<i32>, "data.Store4(16u, asuint(value))"},
-                                         TypeCase{ty_mat2x2<f32>, R"({
+INSTANTIATE_TEST_SUITE_P(
+    HlslGeneratorImplTest_MemberAccessor,
+    HlslGeneratorImplTest_MemberAccessor_StorageBufferStore,
+    testing::Values(TypeCase{ty_u32, "data.Store(4u, asuint(value))"},
+                    TypeCase{ty_f32, "data.Store(4u, asuint(value))"},
+                    TypeCase{ty_i32, "data.Store(4u, asuint(value))"},
+                    TypeCase{ty_f16, "data.Store<float16_t>(4u, value)"},
+                    TypeCase{ty_vec2<u32>, "data.Store2(8u, asuint(value))"},
+                    TypeCase{ty_vec2<f32>, "data.Store2(8u, asuint(value))"},
+                    TypeCase{ty_vec2<i32>, "data.Store2(8u, asuint(value))"},
+                    TypeCase{ty_vec2<f16>, "data.Store<vector<float16_t, 2> >(4u, value)"},
+                    TypeCase{ty_vec3<u32>, "data.Store3(16u, asuint(value))"},
+                    TypeCase{ty_vec3<f32>, "data.Store3(16u, asuint(value))"},
+                    TypeCase{ty_vec3<i32>, "data.Store3(16u, asuint(value))"},
+                    TypeCase{ty_vec3<f16>, "data.Store<vector<float16_t, 3> >(8u, value)"},
+                    TypeCase{ty_vec4<u32>, "data.Store4(16u, asuint(value))"},
+                    TypeCase{ty_vec4<f32>, "data.Store4(16u, asuint(value))"},
+                    TypeCase{ty_vec4<i32>, "data.Store4(16u, asuint(value))"},
+                    TypeCase{ty_vec4<f16>, "data.Store<vector<float16_t, 4> >(8u, value)"},
+                    TypeCase{ty_mat2x2<f32>, R"({
   buffer.Store2((offset + 0u), asuint(value[0u]));
   buffer.Store2((offset + 8u), asuint(value[1u]));
 })"},
-                                         TypeCase{ty_mat2x3<f32>, R"({
+                    TypeCase{ty_mat2x3<f32>, R"({
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
 })"},
-                                         TypeCase{ty_mat2x4<f32>, R"({
+                    TypeCase{ty_mat2x4<f32>, R"({
   buffer.Store4((offset + 0u), asuint(value[0u]));
   buffer.Store4((offset + 16u), asuint(value[1u]));
 })"},
-                                         TypeCase{ty_mat3x2<f32>, R"({
+                    TypeCase{ty_mat3x2<f32>, R"({
   buffer.Store2((offset + 0u), asuint(value[0u]));
   buffer.Store2((offset + 8u), asuint(value[1u]));
   buffer.Store2((offset + 16u), asuint(value[2u]));
 })"},
-                                         TypeCase{ty_mat3x3<f32>, R"({
+                    TypeCase{ty_mat3x3<f32>, R"({
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
   buffer.Store3((offset + 32u), asuint(value[2u]));
 })"},
-                                         TypeCase{ty_mat3x4<f32>, R"({
+                    TypeCase{ty_mat3x4<f32>, R"({
   buffer.Store4((offset + 0u), asuint(value[0u]));
   buffer.Store4((offset + 16u), asuint(value[1u]));
   buffer.Store4((offset + 32u), asuint(value[2u]));
 })"},
-                                         TypeCase{ty_mat4x2<f32>, R"({
+                    TypeCase{ty_mat4x2<f32>, R"({
   buffer.Store2((offset + 0u), asuint(value[0u]));
   buffer.Store2((offset + 8u), asuint(value[1u]));
   buffer.Store2((offset + 16u), asuint(value[2u]));
   buffer.Store2((offset + 24u), asuint(value[3u]));
 })"},
-                                         TypeCase{ty_mat4x3<f32>, R"({
+                    TypeCase{ty_mat4x3<f32>, R"({
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
   buffer.Store3((offset + 32u), asuint(value[2u]));
   buffer.Store3((offset + 48u), asuint(value[3u]));
 })"},
-                                         TypeCase{ty_mat4x4<f32>, R"({
+                    TypeCase{ty_mat4x4<f32>, R"({
   buffer.Store4((offset + 0u), asuint(value[0u]));
   buffer.Store4((offset + 16u), asuint(value[1u]));
   buffer.Store4((offset + 32u), asuint(value[2u]));
   buffer.Store4((offset + 48u), asuint(value[3u]));
+})"},
+                    TypeCase{ty_mat2x2<f16>, R"({
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+})"},
+                    TypeCase{ty_mat2x3<f16>, R"({
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+})"},
+                    TypeCase{ty_mat2x4<f16>, R"({
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+})"},
+                    TypeCase{ty_mat3x2<f16>, R"({
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+})"},
+                    TypeCase{ty_mat3x3<f16>, R"({
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+})"},
+                    TypeCase{ty_mat3x4<f16>, R"({
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+})"},
+                    TypeCase{ty_mat4x2<f16>, R"({
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+})"},
+                    TypeCase{ty_mat4x3<f16>, R"({
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+})"},
+                    TypeCase{ty_mat4x4<f16>, R"({
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
 })"}));
 
 TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Store_Matrix_Empty) {
     // struct Data {
-    //   z : f32;
-    //   a : mat2x3<f32>;
+    //   a : f32,
+    //   b : mat2x3<f32>,
     // };
     // var<storage> data : Data;
-    // data.a = mat2x3<f32>();
+    // data.b = mat2x3<f32>();
 
     SetupStorageBuffer(utils::Vector{
         Member("a", ty.i32()),
@@ -339,10 +1156,10 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
-TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_Matrix_Single_Element) {
+TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_Matrix_F32_Single_Element) {
     // struct Data {
-    //   z : f32;
-    //   a : mat4x3<f32>;
+    //   z : f32,
+    //   a : mat4x3<f32>,
     // };
     // var<storage> data : Data;
     // data.a[2i][1i];
@@ -370,17 +1187,119 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
-TEST_F(HlslGeneratorImplTest_MemberAccessor,
-       EmitExpression_IndexAccessor_StorageBuffer_Load_Int_FromArray) {
+TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_Matrix_F16_Single_Element) {
     // struct Data {
-    //   a : array<i32, 5>;
+    //   z : f16,
+    //   a : mat4x3<f16>,
+    // };
+    // var<storage> data : Data;
+    // data.a[2i][1i];
+
+    Enable(ast::Extension::kF16);
+
+    SetupStorageBuffer(utils::Vector{
+        Member("z", ty.f16()),
+        Member("a", ty.mat4x3<f16>()),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(IndexAccessor(MemberAccessor("data", "a"), 2_i), 1_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(RWByteAddressBuffer data : register(u0, space1);
+
+void main() {
+  float16_t x = data.Load<float16_t>(26u);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, UniformBuffer_Load_Matrix_F32_Single_Element) {
+    // struct Data {
+    //   z : f32,
+    //   a : mat4x3<f32>,
+    // };
+    // var<uniform> data : Data;
+    // data.a[2i][1i];
+
+    SetupUniformBuffer(utils::Vector{
+        Member("z", ty.f32()),
+        Member("a", ty.mat4x3<f32>()),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(IndexAccessor(MemberAccessor("data", "a"), 2_i), 1_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[5];
+};
+
+void main() {
+  float x = asfloat(data[3].y);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, UniformBuffer_Load_Matrix_F16_Single_Element) {
+    // struct Data {
+    //   z : f16,
+    //   a : mat4x3<f16>,
+    // };
+    // var<uniform> data : Data;
+    // data.a[2i][1i];
+
+    Enable(ast::Extension::kF16);
+
+    SetupUniformBuffer(utils::Vector{
+        Member("z", ty.f16()),
+        Member("a", ty.mat4x3<f16>()),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(IndexAccessor(MemberAccessor("data", "a"), 2_i), 1_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[3];
+};
+
+void main() {
+  float16_t x = float16_t(f16tof32(((data[1].z >> 16) & 0xFFFF)));
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       EmitExpression_IndexAccessor_StorageBuffer_Load_I32_FromArray) {
+    // struct Data {
+    //   z : f32,
+    //   a : array<i32, 5i>,
     // };
     // var<storage> data : Data;
     // data.a[2];
 
     SetupStorageBuffer(utils::Vector{
         Member("z", ty.f32()),
-        Member("a", ty.array<i32, 5>(4)),
+        Member("a", ty.array(ty.i32(), 5_i)),
     });
 
     SetupFunction(utils::Vector{
@@ -402,16 +1321,154 @@
 }
 
 TEST_F(HlslGeneratorImplTest_MemberAccessor,
-       EmitExpression_IndexAccessor_StorageBuffer_Load_Int_FromArray_ExprIdx) {
+       EmitExpression_IndexAccessor_UniformBuffer_Load_Vec4_I32_FromArray) {
     // struct Data {
-    //   a : array<i32, 5>;
+    //   z : f32,
+    //   a : array<vec4<i32>, 5i>,
+    // };
+    // var<uniform> data : Data;
+    // data.a[2];
+
+    SetupUniformBuffer(utils::Vector{
+        Member("z", ty.f32()),
+        Member("a", ty.array(ty.vec4(ty.i32()), 5_i)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(MemberAccessor("data", "a"), 2_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[6];
+};
+
+void main() {
+  int4 x = asint(data[3]);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       EmitExpression_IndexAccessor_StorageBuffer_Load_Struct_FromArray) {
+    // struct Inner {
+    //   @size(16i) @align(16i)
+    //   v : i32,
+    // };
+    // struct Data {
+    //   z : f32,
+    //   a : array<Inner, 5i>,
+    // };
+    // var<storage> data : Data;
+    // data.a[2i];
+
+    auto* elem_type = Structure(
+        "Inner", utils::Vector{
+                     Member("v", ty.i32(), utils::Vector{MemberSize(16_i), MemberAlign(16_i)}),
+                 });
+
+    SetupStorageBuffer(utils::Vector{
+        Member("z", ty.f32()),
+        Member("a", ty.array(ty.Of(elem_type), 5_i)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(MemberAccessor("data", "a"), 2_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(struct Inner {
+  int v;
+};
+
+RWByteAddressBuffer data : register(u0, space1);
+
+Inner tint_symbol(RWByteAddressBuffer buffer, uint offset) {
+  const Inner tint_symbol_2 = {asint(buffer.Load((offset + 0u)))};
+  return tint_symbol_2;
+}
+
+void main() {
+  Inner x = tint_symbol(data, 48u);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       EmitExpression_IndexAccessor_UniformBuffer_Load_Struct_FromArray) {
+    // struct Inner {
+    //   @size(16i) @align(16i)
+    //   v : i32,
+    // };
+    // struct Data {
+    //   z : f32,
+    //   a : array<Inner, 5i>,
+    // };
+    // var<uniform> data : Data;
+    // data.a[2i];
+
+    auto* elem_type = Structure(
+        "Inner", utils::Vector{
+                     Member("v", ty.i32(), utils::Vector{MemberSize(16_i), MemberAlign(16_i)}),
+                 });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("z", ty.f32()),
+        Member("a", ty.array(ty.Of(elem_type), 5_i)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(MemberAccessor("data", "a"), 2_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(struct Inner {
+  int v;
+};
+
+cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[6];
+};
+
+Inner tint_symbol(uint4 buffer[6], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const Inner tint_symbol_2 = {asint(buffer[scalar_offset / 4][scalar_offset % 4])};
+  return tint_symbol_2;
+}
+
+void main() {
+  Inner x = tint_symbol(data, 48u);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       EmitExpression_IndexAccessor_StorageBuffer_Load_I32_FromArray_ExprIdx) {
+    // struct Data {
+    //   z : f32,
+    //   a : array<i32, 5i>,
     // };
     // var<storage> data : Data;
     // data.a[(2i + 4i) - 3i];
 
     SetupStorageBuffer(utils::Vector{
         Member("z", ty.f32()),
-        Member("a", ty.array<i32, 5>(4)),
+        Member("a", ty.array(ty.i32(), 5_i)),
     });
 
     SetupFunction(utils::Vector{
@@ -438,16 +1495,57 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       EmitExpression_IndexAccessor_UniformBuffer_Load_Vec4_I32_FromArray_ExprIdx) {
+    // struct Data {
+    //   z : f32,
+    //   a : array<vec4<i32>, 5i>,
+    // };
+    // var<uniform> data : Data;
+    // data.a[(2i + 4i) - 3i];
+
+    SetupUniformBuffer(utils::Vector{
+        Member("z", ty.f32()),
+        Member("a", ty.array(ty.vec4(ty.i32()), 5_i)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("a", Expr(2_i))),
+        Decl(Var("b", Expr(4_i))),
+        Decl(Var("c", Expr(3_i))),
+        Decl(Var("x", IndexAccessor(MemberAccessor("data", "a"), Sub(Add("a", "b"), "c")))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[6];
+};
+
+void main() {
+  int a = 2;
+  int b = 4;
+  int c = 3;
+  const uint scalar_offset = ((16u + (16u * uint(((a + b) - c))))) / 4;
+  int4 x = asint(data[scalar_offset / 4]);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
 TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Store_ToArray) {
     // struct Data {
-    //   a : array<i32, 5>;
+    //   a : array<i32, 5i>,
     // };
     // var<storage> data : Data;
-    // data.a[2] = 2;
+    // data.a[2i] = 2i;
 
     SetupStorageBuffer(utils::Vector{
         Member("z", ty.f32()),
-        Member("a", ty.array<i32, 5>(4)),
+        Member("a", ty.array(ty.i32(), 5_i)),
     });
 
     SetupFunction(utils::Vector{
@@ -470,23 +1568,23 @@
 
 TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_MultiLevel) {
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   c : array<Inner, 4u>,
     // };
     //
-    // var<storage> data : Pre;
-    // data.c[2].b
+    // var<storage> data : Data;
+    // data.c[2i].b
 
     auto* inner = Structure("Inner", utils::Vector{
-                                         Member("a", ty.vec3<f32>()),
+                                         Member("a", ty.vec3<i32>()),
                                          Member("b", ty.vec3<f32>()),
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{
@@ -507,31 +1605,72 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
-TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_MultiLevel_Swizzle) {
+TEST_F(HlslGeneratorImplTest_MemberAccessor, UniformBuffer_Load_MultiLevel) {
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   var c : array<Inner, 4u>,
     // };
     //
-    // var<storage> data : Pre;
-    // data.c[2].b.xy
+    // var<storage> data : Data;
+    // data.c[2i].b
 
     auto* inner = Structure("Inner", utils::Vector{
-                                         Member("a", ty.vec3<f32>()),
+                                         Member("a", ty.vec3<i32>()),
+                                         Member("b", ty.vec3<f32>()),
+                                     });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("c", ty.array(ty.Of(inner), 4_u)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[8];
+};
+
+void main() {
+  float3 x = asfloat(data[5].xyz);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_MultiLevel_Swizzle) {
+    // struct Inner {
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
+    // };
+    // struct Data {
+    //   var c : array<Inner, 4u>,
+    // };
+    //
+    // var<storage> data : Data;
+    // data.c[2i].b.yx
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.vec3<i32>()),
                                          Member("b", ty.vec3<f32>()),
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{
         Decl(Var("x",
                  MemberAccessor(
-                     MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"), "xy"))),
+                     MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"), "yx"))),
     });
 
     GeneratorImpl& gen = SanitizeAndBuild();
@@ -541,7 +1680,50 @@
         R"(RWByteAddressBuffer data : register(u0, space1);
 
 void main() {
-  float2 x = asfloat(data.Load3(80u)).xy;
+  float2 x = asfloat(data.Load3(80u)).yx;
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, UniformBuffer_Load_MultiLevel_Swizzle) {
+    // struct Inner {
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
+    // };
+    // struct Data {
+    //   var c : array<Inner, 4u>,
+    // };
+    //
+    // var<uniform> data : Data;
+    // data.c[2i].b.yx
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.vec3<i32>()),
+                                         Member("b", ty.vec3<f32>()),
+                                     });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("c", ty.array(ty.Of(inner), 4_u)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x",
+                 MemberAccessor(
+                     MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"), "yx"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[8];
+};
+
+void main() {
+  float2 x = asfloat(data[5].xyz).yx;
   return;
 }
 )";
@@ -551,23 +1733,23 @@
 TEST_F(HlslGeneratorImplTest_MemberAccessor,
        StorageBuffer_Load_MultiLevel_Swizzle_SingleLetter) {  // NOLINT
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   var c : array<Inner, 4u>,
     // };
     //
-    // var<storage> data : Pre;
-    // data.c[2].b.g
+    // var<storage> data : Data;
+    // data.c[2i].b.g
 
     auto* inner = Structure("Inner", utils::Vector{
-                                         Member("a", ty.vec3<f32>()),
+                                         Member("a", ty.vec3<i32>()),
                                          Member("b", ty.vec3<f32>()),
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{
@@ -590,25 +1772,69 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
-TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_MultiLevel_Index) {
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       UniformBuffer_Load_MultiLevel_Swizzle_SingleLetter) {  // NOLINT
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   var c : array<Inner, 4u>,
     // };
     //
-    // var<storage> data : Pre;
-    // data.c[2].b[1]
+    // var<uniform> data : Data;
+    // data.c[2i].b.g
 
     auto* inner = Structure("Inner", utils::Vector{
-                                         Member("a", ty.vec3<f32>()),
+                                         Member("a", ty.vec3<i32>()),
+                                         Member("b", ty.vec3<f32>()),
+                                     });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("c", ty.array(ty.Of(inner), 4_u)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x",
+                 MemberAccessor(
+                     MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"), "g"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[8];
+};
+
+void main() {
+  float x = asfloat(data[5].y);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_MultiLevel_Index) {
+    // struct Inner {
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
+    // };
+    // struct Data {
+    //   var c : array<Inner, 4u>,
+    // };
+    //
+    // var<storage> data : Data;
+    // data.c[2i].b[1i]
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.vec3<i32>()),
                                          Member("b", ty.vec3<f32>()),
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{
@@ -631,25 +1857,68 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
-TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Store_MultiLevel) {
+TEST_F(HlslGeneratorImplTest_MemberAccessor, UniformBuffer_Load_MultiLevel_Index) {
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   var c : array<Inner, 4u>,
+    // };
+    //
+    // var<uniform> data : Data;
+    // data.c[2i].b[1i]
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.vec3<i32>()),
+                                         Member("b", ty.vec3<f32>()),
+                                     });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("c", ty.array(ty.Of(inner), 4_u)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x",
+                 IndexAccessor(MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"),
+                               1_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[8];
+};
+
+void main() {
+  float x = asfloat(data[5].y);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Store_MultiLevel) {
+    // struct Inner {
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
+    // };
+    // struct Data {
+    //   var c : array<Inner, 4u>,
     // };
     //
     // var<storage> data : Pre;
-    // data.c[2].b = vec3<f32>(1_f, 2_f, 3_f);
+    // data.c[2i].b = vec3<f32>(1_f, 2_f, 3_f);
 
     auto* inner = Structure("Inner", utils::Vector{
-                                         Member("a", ty.vec3<f32>()),
+                                         Member("a", ty.vec3<i32>()),
                                          Member("b", ty.vec3<f32>()),
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{
@@ -673,15 +1942,15 @@
 
 TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Store_Swizzle_SingleLetter) {
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   var c : array<Inner, 4u>,
     // };
     //
     // var<storage> data : Pre;
-    // data.c[2].b.y = 1.f;
+    // data.c[2i].b.y = 1.f;
 
     auto* inner = Structure("Inner", utils::Vector{
                                          Member("a", ty.vec3<i32>()),
@@ -689,7 +1958,7 @@
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{
diff --git a/src/tint/writer/spirv/builder.cc b/src/tint/writer/spirv/builder.cc
index 9b1e077..c5e4a38 100644
--- a/src/tint/writer/spirv/builder.cc
+++ b/src/tint/writer/spirv/builder.cc
@@ -3955,11 +3955,7 @@
     if (matrix_type) {
         push_annot(spv::Op::OpMemberDecorate,
                    {Operand(struct_id), Operand(idx), U32Operand(SpvDecorationColMajor)});
-        if (!matrix_type->type()->Is<sem::F32>()) {
-            error_ = "matrix scalar element type must be f32";
-            return 0;
-        }
-        const uint32_t scalar_elem_size = 4;
+        const uint32_t scalar_elem_size = matrix_type->type()->Size();
         const uint32_t effective_row_count = (matrix_type->rows() == 2) ? 2 : 4;
         push_annot(spv::Op::OpMemberDecorate,
                    {Operand(struct_id), Operand(idx), U32Operand(SpvDecorationMatrixStride),
diff --git a/src/tint/writer/spirv/builder_type_test.cc b/src/tint/writer/spirv/builder_type_test.cc
index a17dcb5..4377c42 100644
--- a/src/tint/writer/spirv/builder_type_test.cc
+++ b/src/tint/writer/spirv/builder_type_test.cc
@@ -317,7 +317,9 @@
 }
 
 TEST_F(BuilderTest_Type, GenerateStruct) {
-    auto* s = Structure("my_struct", utils::Vector{Member("a", ty.f32())});
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("my_struct", utils::Vector{Member("a", ty.f32()), Member("b", ty.f16())});
 
     spirv::Builder& b = Build();
 
@@ -326,17 +328,23 @@
     EXPECT_EQ(id, 1u);
 
     EXPECT_EQ(DumpInstructions(b.types()), R"(%2 = OpTypeFloat 32
-%1 = OpTypeStruct %2
+%3 = OpTypeFloat 16
+%1 = OpTypeStruct %2 %3
 )");
     EXPECT_EQ(DumpInstructions(b.debug()), R"(OpName %1 "my_struct"
 OpMemberName %1 0 "a"
+OpMemberName %1 1 "b"
 )");
 }
 
 TEST_F(BuilderTest_Type, GenerateStruct_DecoratedMembers) {
+    Enable(ast::Extension::kF16);
+
     auto* s = Structure("S", utils::Vector{
                                  Member("a", ty.f32()),
                                  Member("b", ty.f32(), utils::Vector{MemberAlign(8_i)}),
+                                 Member("c", ty.f16(), utils::Vector{MemberAlign(8_u)}),
+                                 Member("d", ty.f16()),
                              });
 
     spirv::Builder& b = Build();
@@ -346,23 +354,34 @@
     EXPECT_EQ(id, 1u);
 
     EXPECT_EQ(DumpInstructions(b.types()), R"(%2 = OpTypeFloat 32
-%1 = OpTypeStruct %2 %2
+%3 = OpTypeFloat 16
+%1 = OpTypeStruct %2 %2 %3 %3
 )");
     EXPECT_EQ(DumpInstructions(b.debug()), R"(OpName %1 "S"
 OpMemberName %1 0 "a"
 OpMemberName %1 1 "b"
+OpMemberName %1 2 "c"
+OpMemberName %1 3 "d"
 )");
     EXPECT_EQ(DumpInstructions(b.annots()), R"(OpMemberDecorate %1 0 Offset 0
 OpMemberDecorate %1 1 Offset 8
+OpMemberDecorate %1 2 Offset 16
+OpMemberDecorate %1 3 Offset 18
 )");
 }
 
-TEST_F(BuilderTest_Type, GenerateStruct_NonLayout_Matrix) {
-    auto* s = Structure("S", utils::Vector{
-                                 Member("a", ty.mat2x2<f32>()),
-                                 Member("b", ty.mat2x3<f32>()),
-                                 Member("c", ty.mat4x4<f32>()),
-                             });
+TEST_F(BuilderTest_Type, GenerateStruct_DecoratedMembers_Matrix) {
+    Enable(ast::Extension::kF16);
+
+    auto* s =
+        Structure("S", utils::Vector{
+                           Member("mat2x2_f32", ty.mat2x2<f32>()),
+                           Member("mat2x3_f32", ty.mat2x3<f32>(), utils::Vector{MemberAlign(64_i)}),
+                           Member("mat4x4_f32", ty.mat4x4<f32>()),
+                           Member("mat2x2_f16", ty.mat2x2<f16>(), utils::Vector{MemberAlign(32_i)}),
+                           Member("mat2x3_f16", ty.mat2x3<f16>()),
+                           Member("mat4x4_f16", ty.mat4x4<f16>(), utils::Vector{MemberAlign(64_i)}),
+                       });
 
     spirv::Builder& b = Build();
 
@@ -377,78 +396,63 @@
 %5 = OpTypeMatrix %6 2
 %8 = OpTypeVector %4 4
 %7 = OpTypeMatrix %8 4
-%1 = OpTypeStruct %2 %5 %7
+%11 = OpTypeFloat 16
+%10 = OpTypeVector %11 2
+%9 = OpTypeMatrix %10 2
+%13 = OpTypeVector %11 3
+%12 = OpTypeMatrix %13 2
+%15 = OpTypeVector %11 4
+%14 = OpTypeMatrix %15 4
+%1 = OpTypeStruct %2 %5 %7 %9 %12 %14
 )");
     EXPECT_EQ(DumpInstructions(b.debug()), R"(OpName %1 "S"
-OpMemberName %1 0 "a"
-OpMemberName %1 1 "b"
-OpMemberName %1 2 "c"
+OpMemberName %1 0 "mat2x2_f32"
+OpMemberName %1 1 "mat2x3_f32"
+OpMemberName %1 2 "mat4x4_f32"
+OpMemberName %1 3 "mat2x2_f16"
+OpMemberName %1 4 "mat2x3_f16"
+OpMemberName %1 5 "mat4x4_f16"
 )");
     EXPECT_EQ(DumpInstructions(b.annots()), R"(OpMemberDecorate %1 0 Offset 0
 OpMemberDecorate %1 0 ColMajor
 OpMemberDecorate %1 0 MatrixStride 8
-OpMemberDecorate %1 1 Offset 16
+OpMemberDecorate %1 1 Offset 64
 OpMemberDecorate %1 1 ColMajor
 OpMemberDecorate %1 1 MatrixStride 16
-OpMemberDecorate %1 2 Offset 48
+OpMemberDecorate %1 2 Offset 96
 OpMemberDecorate %1 2 ColMajor
 OpMemberDecorate %1 2 MatrixStride 16
+OpMemberDecorate %1 3 Offset 160
+OpMemberDecorate %1 3 ColMajor
+OpMemberDecorate %1 3 MatrixStride 4
+OpMemberDecorate %1 4 Offset 168
+OpMemberDecorate %1 4 ColMajor
+OpMemberDecorate %1 4 MatrixStride 8
+OpMemberDecorate %1 5 Offset 192
+OpMemberDecorate %1 5 ColMajor
+OpMemberDecorate %1 5 MatrixStride 8
 )");
 }
 
-TEST_F(BuilderTest_Type, GenerateStruct_DecoratedMembers_LayoutMatrix) {
-    // We have to infer layout for matrix when it also has an offset.
-    auto* s = Structure("S", utils::Vector{
-                                 Member("a", ty.mat2x2<f32>()),
-                                 Member("b", ty.mat2x3<f32>()),
-                                 Member("c", ty.mat4x4<f32>()),
-                             });
+TEST_F(BuilderTest_Type, GenerateStruct_DecoratedMembers_ArraysOfMatrix) {
+    Enable(ast::Extension::kF16);
 
-    spirv::Builder& b = Build();
-
-    auto id = b.GenerateTypeIfNeeded(program->TypeOf(s));
-    ASSERT_FALSE(b.has_error()) << b.error();
-    EXPECT_EQ(id, 1u);
-
-    EXPECT_EQ(DumpInstructions(b.types()), R"(%4 = OpTypeFloat 32
-%3 = OpTypeVector %4 2
-%2 = OpTypeMatrix %3 2
-%6 = OpTypeVector %4 3
-%5 = OpTypeMatrix %6 2
-%8 = OpTypeVector %4 4
-%7 = OpTypeMatrix %8 4
-%1 = OpTypeStruct %2 %5 %7
-)");
-    EXPECT_EQ(DumpInstructions(b.debug()), R"(OpName %1 "S"
-OpMemberName %1 0 "a"
-OpMemberName %1 1 "b"
-OpMemberName %1 2 "c"
-)");
-    EXPECT_EQ(DumpInstructions(b.annots()), R"(OpMemberDecorate %1 0 Offset 0
-OpMemberDecorate %1 0 ColMajor
-OpMemberDecorate %1 0 MatrixStride 8
-OpMemberDecorate %1 1 Offset 16
-OpMemberDecorate %1 1 ColMajor
-OpMemberDecorate %1 1 MatrixStride 16
-OpMemberDecorate %1 2 Offset 48
-OpMemberDecorate %1 2 ColMajor
-OpMemberDecorate %1 2 MatrixStride 16
-)");
-}
-
-TEST_F(BuilderTest_Type, GenerateStruct_DecoratedMembers_LayoutArraysOfMatrix) {
-    // We have to infer layout for matrix when it also has an offset.
-    // The decoration goes on the struct member, even if the matrix is buried
-    // in levels of arrays.
-    auto* arr_mat2x2 = ty.array(ty.mat2x2<f32>(), 1_u);      // Singly nested array
-    auto* arr_arr_mat2x3 = ty.array(ty.mat2x3<f32>(), 1_u);  // Doubly nested array
+    auto* arr_mat2x2_f32 = ty.array(ty.mat2x2<f32>(), 1_u);  // Singly nested array
+    auto* arr_mat2x2_f16 = ty.array(ty.mat2x2<f16>(), 1_u);  // Singly nested array
+    auto* arr_arr_mat2x3_f32 =
+        ty.array(ty.array(ty.mat2x3<f32>(), 1_u), 2_u);  // Doubly nested array
+    auto* arr_arr_mat2x3_f16 =
+        ty.array(ty.array(ty.mat2x3<f16>(), 1_u), 2_u);      // Doubly nested array
     auto* rtarr_mat4x4 = ty.array(ty.mat4x4<f32>());         // Runtime array
 
-    auto* s = Structure("S", utils::Vector{
-                                 Member("a", arr_mat2x2),
-                                 Member("b", arr_arr_mat2x3),
-                                 Member("c", rtarr_mat4x4),
-                             });
+    auto* s = Structure(
+        "S", utils::Vector{
+                 Member("arr_mat2x2_f32", arr_mat2x2_f32),
+                 Member("arr_mat2x2_f16", arr_mat2x2_f16, utils::Vector{MemberAlign(64_i)}),
+                 Member("arr_arr_mat2x3_f32", arr_arr_mat2x3_f32, utils::Vector{MemberAlign(64_i)}),
+                 Member("arr_arr_mat2x3_f16", arr_arr_mat2x3_f16),
+                 Member("rtarr_mat4x4", rtarr_mat4x4),
+             });
 
     spirv::Builder& b = Build();
 
@@ -462,31 +466,53 @@
 %6 = OpTypeInt 32 0
 %7 = OpConstant %6 1
 %2 = OpTypeArray %3 %7
-%10 = OpTypeVector %5 3
+%11 = OpTypeFloat 16
+%10 = OpTypeVector %11 2
 %9 = OpTypeMatrix %10 2
 %8 = OpTypeArray %9 %7
-%13 = OpTypeVector %5 4
-%12 = OpTypeMatrix %13 4
-%11 = OpTypeRuntimeArray %12
-%1 = OpTypeStruct %2 %8 %11
+%15 = OpTypeVector %5 3
+%14 = OpTypeMatrix %15 2
+%13 = OpTypeArray %14 %7
+%16 = OpConstant %6 2
+%12 = OpTypeArray %13 %16
+%20 = OpTypeVector %11 3
+%19 = OpTypeMatrix %20 2
+%18 = OpTypeArray %19 %7
+%17 = OpTypeArray %18 %16
+%23 = OpTypeVector %5 4
+%22 = OpTypeMatrix %23 4
+%21 = OpTypeRuntimeArray %22
+%1 = OpTypeStruct %2 %8 %12 %17 %21
 )");
     EXPECT_EQ(DumpInstructions(b.debug()), R"(OpName %1 "S"
-OpMemberName %1 0 "a"
-OpMemberName %1 1 "b"
-OpMemberName %1 2 "c"
+OpMemberName %1 0 "arr_mat2x2_f32"
+OpMemberName %1 1 "arr_mat2x2_f16"
+OpMemberName %1 2 "arr_arr_mat2x3_f32"
+OpMemberName %1 3 "arr_arr_mat2x3_f16"
+OpMemberName %1 4 "rtarr_mat4x4"
 )");
     EXPECT_EQ(DumpInstructions(b.annots()), R"(OpMemberDecorate %1 0 Offset 0
 OpMemberDecorate %1 0 ColMajor
 OpMemberDecorate %1 0 MatrixStride 8
 OpDecorate %2 ArrayStride 16
-OpMemberDecorate %1 1 Offset 16
+OpMemberDecorate %1 1 Offset 64
 OpMemberDecorate %1 1 ColMajor
-OpMemberDecorate %1 1 MatrixStride 16
-OpDecorate %8 ArrayStride 32
-OpMemberDecorate %1 2 Offset 48
+OpMemberDecorate %1 1 MatrixStride 4
+OpDecorate %8 ArrayStride 8
+OpMemberDecorate %1 2 Offset 128
 OpMemberDecorate %1 2 ColMajor
 OpMemberDecorate %1 2 MatrixStride 16
-OpDecorate %11 ArrayStride 64
+OpDecorate %13 ArrayStride 32
+OpDecorate %12 ArrayStride 32
+OpMemberDecorate %1 3 Offset 192
+OpMemberDecorate %1 3 ColMajor
+OpMemberDecorate %1 3 MatrixStride 8
+OpDecorate %18 ArrayStride 16
+OpDecorate %17 ArrayStride 16
+OpMemberDecorate %1 4 Offset 224
+OpMemberDecorate %1 4 ColMajor
+OpMemberDecorate %1 4 MatrixStride 16
+OpDecorate %21 ArrayStride 64
 )");
 }