Tint: Implement f16 in uniform and storage address space This CL implements f16 in uniform and storage address space, allowing using f16 types in uniform and storage buffers on all backends. Tint uint tests and Dawn E2E tests are added to validate the f16 types work as expected. Bug: tint:1473, tint:1502 Change-Id: I15e3de1033d3727f2ea33f4657f682c5f13c2153 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/106320 Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com> Reviewed-by: Ben Clayton <bclayton@google.com>

commit: ab9b5f3aa5996054993f01914fe5a6833a2c8e38 [log] [tgz]
author: Zhaoming Jiang <zhaoming.jiang@intel.com> Thu Nov 24 05:25:35 2022 +0000
committer: Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com> Thu Nov 24 05:25:35 2022 +0000
tree: 8192501908f4907c9ed4c31ac57860d5471c1191
parent: ff2b5e441cc7417281f08135c80702c74b91c9ad [diff]
diff --git a/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp b/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp
index 0ceca8d..79cacd9 100644
--- a/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp
+++ b/src/dawn/tests/end2end/ComputeLayoutMemoryBufferTests.cpp

@@ -96,7 +96,17 @@
 //   3. "Padding": Add `size` bytes of padding bytes into buffer;
 //   4. "FillingFixed": Fill all `size` given (fixed) bytes into the memory buffer.
 // Note that data bytes and padding bytes are generated seperatedly and designed to
-// be distinguishable, i.e. data bytes have MSB set to 0 while padding bytes 1.
+// be distinguishable, i.e. data bytes have the second most significant bit set to 0 while padding
+// bytes 1.
+// We don't want testing data includes NaN or Inf, because according to WGSL spec an implementation
+// may give indeterminate value if a expression evaluated to NaN or Inf, and in Tint generated
+// HLSL reading a f16 NaN from buffer is not bit-pattern preserved (i.e. a NaN input may be changed
+// to another NaN with different bit pattern). In bit representation of both f32 and f16, the first
+// (most significant) bit is sign bit, and some biased exponent bits go after it (start from the
+// second most significant bit). A float value is NaN or Inf if and only if all its exponent bits
+// are 1. By setting the second most significant bit of every data byte to 0, we ensure that the
+// second most significant bit of any float data in the buffer is 0, and therefore avoid generating
+// NaN or Inf float datas.
 class MemoryDataBuilder {
   public:
     // Record a "Align" operation
@@ -150,15 +160,20 @@
                                  uint8_t paddingXorKey) {
         uint8_t dataByte = 0x0u;
         uint8_t paddingByte = 0x2u;
-        // Get a data byte with MSB set to 0.
+        // Padding mask, setting the second most significant bit to 1
+        constexpr uint8_t paddingMask = 0x40u;
+        // Data mask, masking the second most significant bit to 0, distinguished from padding
+        // bytes and avoid NaN or Inf.
+        constexpr uint8_t dataMask = ~paddingMask;
+        // Get a data byte
         auto NextDataByte = [&]() {
             dataByte += 0x11u;
-            return static_cast<uint8_t>((dataByte ^ dataXorKey) & 0x7fu);
+            return static_cast<uint8_t>((dataByte ^ dataXorKey) & dataMask);
         };
-        // Get a padding byte with MSB set to 1, distinguished from data bytes.
+        // Get a padding byte
         auto NextPaddingByte = [&]() {
             paddingByte += 0x13u;
-            return static_cast<uint8_t>((paddingByte ^ paddingXorKey) | 0x80u);
+            return static_cast<uint8_t>((paddingByte ^ paddingXorKey) | paddingMask);
         };
         for (auto& operation : mOperations) {
             switch (operation.mType) {
@@ -234,10 +249,11 @@
   public:
     // Constructor with WGSL type name, natural alignment and natural size. Set mStrideDataBytes to
     // natural size and mStridePaddingBytes to 0 by default to indicate continious data part.
-    Field(std::string wgslType, size_t align, size_t size)
+    Field(std::string wgslType, size_t align, size_t size, bool requireF16Feature)
         : mWGSLType(wgslType),
           mAlign(align),
           mSize(size),
+          mRequireF16Feature(requireF16Feature),
           mStrideDataBytes(size),
           mStridePaddingBytes(0) {}
 
@@ -247,6 +263,7 @@
     size_t GetUnpaddedSize() const { return mSize; }
     // The padded size determined by @size attribute if existed, otherwise the natural size
     size_t GetPaddedSize() const { return mHasSizeAttribute ? mPaddedSize : mSize; }
+    bool IsRequireF16Feature() const { return mRequireF16Feature; }
 
     // Applies a @size attribute, sets the mPaddedSize to value.
     // Returns this Field so calls can be chained.
@@ -337,7 +354,8 @@
 
     // Helper function to build a Field describing a scalar type.
     static Field Scalar(ScalarType type) {
-        return Field(ScalarTypeName(type), ScalarTypeSize(type), ScalarTypeSize(type));
+        return Field(ScalarTypeName(type), ScalarTypeSize(type), ScalarTypeSize(type),
+                     type == ScalarType::f16);
     }
 
     // Helper function to build a Field describing a vector type.
@@ -347,7 +365,7 @@
         size_t vectorSize = n * elementSize;
         size_t vectorAlignment = (n == 3 ? 4 : n) * elementSize;
         return Field{"vec" + std::to_string(n) + "<" + ScalarTypeName(type) + ">", vectorAlignment,
-                     vectorSize};
+                     vectorSize, type == ScalarType::f16};
     }
 
     // Helper function to build a Field describing a matrix type.
@@ -360,7 +378,7 @@
         size_t colVectorAlignment = (row == 3 ? 4 : row) * elementSize;
         Field field = Field{"mat" + std::to_string(col) + "x" + std::to_string(row) + "<" +
                                 ScalarTypeName(type) + ">",
-                            colVectorAlignment, col * colVectorAlignment};
+                            colVectorAlignment, col * colVectorAlignment, type == ScalarType::f16};
         if (colVectorSize != colVectorAlignment) {
             field.Strided(colVectorSize, colVectorAlignment - colVectorSize);
         }
@@ -371,6 +389,7 @@
     const std::string mWGSLType;  // Friendly WGSL name of the type of the field
     size_t mAlign;       // Alignment of the type in bytes, can be change by @align attribute
     const size_t mSize;  // Natural size of the type in bytes
+    const bool mRequireF16Feature;
 
     bool mHasAlignAttribute = false;
     bool mHasSizeAttribute = false;
@@ -392,6 +411,25 @@
     return o;
 }
 
+std::ostream& operator<<(std::ostream& o, const std::vector<uint8_t>& byteBuffer) {
+    o << "\n";
+    uint32_t i = 0;
+    for (auto byte : byteBuffer) {
+        o << std::hex << std::setw(2) << std::setfill('0') << uint32_t(byte);
+        if (i < 31) {
+            o << " ";
+            i++;
+        } else {
+            o << "\n";
+            i = 0;
+        }
+    }
+    if (i != 0) {
+        o << "\n";
+    }
+    return o;
+}
+
 // Create a compute pipeline with all buffer in bufferList binded in order starting from slot 0, and
 // run the given shader.
 void RunComputeShaderWithBuffers(const wgpu::Device& device,
@@ -445,7 +483,40 @@
 
 class ComputeLayoutMemoryBufferTests
     : public DawnTestWithParams<ComputeLayoutMemoryBufferTestParams> {
-    void SetUp() override { DawnTestBase::SetUp(); }
+    // void SetUp() override { DawnTestBase::SetUp(); }
+
+  protected:
+    // Require f16 feature if possible
+    std::vector<wgpu::FeatureName> GetRequiredFeatures() override {
+        mIsShaderF16SupportedOnAdapter = SupportsFeatures({wgpu::FeatureName::ShaderF16});
+        if (!mIsShaderF16SupportedOnAdapter) {
+            return {};
+        }
+
+        if (!IsD3D12()) {
+            mUseDxcEnabledOrNonD3D12 = true;
+        } else {
+            for (auto* enabledToggle : GetParam().forceEnabledWorkarounds) {
+                if (strncmp(enabledToggle, "use_dxc", 7) == 0) {
+                    mUseDxcEnabledOrNonD3D12 = true;
+                    break;
+                }
+            }
+        }
+
+        if (mUseDxcEnabledOrNonD3D12) {
+            return {wgpu::FeatureName::ShaderF16};
+        }
+
+        return {};
+    }
+
+    bool IsShaderF16SupportedOnAdapter() const { return mIsShaderF16SupportedOnAdapter; }
+    bool UseDxcEnabledOrNonD3D12() const { return mUseDxcEnabledOrNonD3D12; }
+
+  private:
+    bool mIsShaderF16SupportedOnAdapter = false;
+    bool mUseDxcEnabledOrNonD3D12 = false;
 };
 
 // Align returns the WGSL decoration for an explicit structure field alignment
@@ -472,9 +543,14 @@
 
     const Field& field = GetParam().mField;
 
+    if (field.IsRequireF16Feature() && !device.HasFeature(wgpu::FeatureName::ShaderF16)) {
+        return;
+    }
+
     const bool isUniform = GetParam().mAddressSpace == AddressSpace::Uniform;
 
-    std::string shader = R"(
+    std::string shader = std::string(field.IsRequireF16Feature() ? "enable f16;" : "") +
+                         R"(
 struct Data {
     header : u32,
     @align({field_align}) @size({field_size}) field : {field_type},
@@ -553,6 +629,7 @@
         {
             inputDataBuilder.AddFixedU32(kDataHeaderCode);           // Input.data.header
             inputDataBuilder.AddSubBuilder(field.GetDataBuilder());  // Input.data.field
+            inputDataBuilder.AlignTo(4);                             // Input.data.footer alignment
             inputDataBuilder.AddFixedU32(kDataFooterCode);           // Input.data.footer
             inputDataBuilder.AlignTo(field.GetAlign());              // Input.data padding
         }
@@ -563,6 +640,7 @@
 
     MemoryDataBuilder expectedDataBuilder;  // The expected data to be copied by the shader
     expectedDataBuilder.AddSubBuilder(field.GetDataBuilder());
+    expectedDataBuilder.AlignTo(4);  // Storage buffer size must be a multiple of 4
 
     // Expectation and input buffer have identical data bytes but different padding bytes.
     // Initializes the dst buffer with data bytes different from input and expectation, and padding
@@ -603,25 +681,36 @@
     EXPECT_BUFFER_U32_EQ(kStatusOk, statusBuf, 0) << "status code error" << std::endl
                                                   << "Shader: " << shader;
 
-    // Check the data
+    // Check the data. Note that MemoryDataBuilder avoid generating NaN and Inf floating point data,
+    // whose bit pattern will not get preserved when reading from buffer (arbitrary NaNs may be
+    // silently transformed into a quiet NaN). Having NaN and Inf floating point data in input may
+    // result in bitwise mismatch.
     field.CheckData([&](uint32_t offset, uint32_t size) {
         EXPECT_BUFFER_U8_RANGE_EQ(expectedData.data() + offset, outputBuf, offset, size)
-            << "offset: " << offset;
+            << "offset: " << offset << "\n Input buffer:" << inputData << "Shader:\n"
+            << shader << "\n";
     });
 }
 
 // Test different types that used directly as buffer type
 TEST_P(ComputeLayoutMemoryBufferTests, NonStructMember) {
     auto params = GetParam();
+
     Field& field = params.mField;
+
     // @size and @align attribute only apply to struct members, skip them
     if (field.HasSizeAttribute() || field.HasAlignAttribute()) {
         return;
     }
 
+    if (field.IsRequireF16Feature() && !device.HasFeature(wgpu::FeatureName::ShaderF16)) {
+        return;
+    }
+
     const bool isUniform = GetParam().mAddressSpace == AddressSpace::Uniform;
 
-    std::string shader = R"(
+    std::string shader = std::string(field.IsRequireF16Feature() ? "enable f16;" : "") +
+                         R"(
 @group(0) @binding(0) var<{input_qualifiers}> input : {field_type};
 @group(0) @binding(1) var<storage, read_write> output : {field_type};
 
@@ -638,10 +727,11 @@
     // Build the input and expected data.
     MemoryDataBuilder dataBuilder;
     dataBuilder.AddSubBuilder(field.GetDataBuilder());
+    dataBuilder.AlignTo(4);  // Storage buffer size must be a multiple of 4
 
     // Expectation and input buffer have identical data bytes but different padding bytes.
-    // Initializes the dst buffer with data bytes different from input and expectation, and padding
-    // bytes identical to expectation but different from input.
+    // Initializes the dst buffer with data bytes different from input and expectation, and
+    // padding bytes identical to expectation but different from input.
     constexpr uint8_t dataKeyForInputAndExpectation = 0x00u;
     constexpr uint8_t dataKeyForDstInit = 0xffu;
     constexpr uint8_t paddingKeyForInput = 0x3fu;
@@ -669,10 +759,14 @@
 
     RunComputeShaderWithBuffers(device, queue, shader, {inputBuf, outputBuf});
 
-    // Check the data
+    // Check the data. Note that MemoryDataBuilder avoid generating NaN and Inf floating point data,
+    // whose bit pattern will not get preserved when reading from buffer (arbitrary NaNs may be
+    // silently transformed into a quiet NaN). Having NaN and Inf floating point data in input may
+    // result in bitwise mismatch.
     field.CheckData([&](uint32_t offset, uint32_t size) {
         EXPECT_BUFFER_U8_RANGE_EQ(expectedData.data() + offset, outputBuf, offset, size)
-            << "offset: " << offset;
+            << "offset: " << offset << "\n Input buffer:" << inputData << "Shader:\n"
+            << shader << "\n";
     });
 }
 
@@ -680,6 +774,7 @@
     auto params = MakeParamGenerator<ComputeLayoutMemoryBufferTestParams>(
         {
             D3D12Backend(),
+            D3D12Backend({"use_dxc"}),
             MetalBackend(),
             VulkanBackend(),
             OpenGLBackend(),
@@ -692,16 +787,19 @@
             Field::Scalar(ScalarType::f32),
             Field::Scalar(ScalarType::i32),
             Field::Scalar(ScalarType::u32),
+            Field::Scalar(ScalarType::f16),
 
             // Scalar types with custom alignment
             Field::Scalar(ScalarType::f32).AlignAttribute(16),
             Field::Scalar(ScalarType::i32).AlignAttribute(16),
             Field::Scalar(ScalarType::u32).AlignAttribute(16),
+            Field::Scalar(ScalarType::f16).AlignAttribute(16),
 
             // Scalar types with custom size
             Field::Scalar(ScalarType::f32).SizeAttribute(24),
             Field::Scalar(ScalarType::i32).SizeAttribute(24),
             Field::Scalar(ScalarType::u32).SizeAttribute(24),
+            Field::Scalar(ScalarType::f16).SizeAttribute(24),
 
             // Vector types with no custom alignment or size
             Field::Vector(2, ScalarType::f32),
@@ -713,6 +811,9 @@
             Field::Vector(2, ScalarType::u32),
             Field::Vector(3, ScalarType::u32),
             Field::Vector(4, ScalarType::u32),
+            Field::Vector(2, ScalarType::f16),
+            Field::Vector(3, ScalarType::f16),
+            Field::Vector(4, ScalarType::f16),
 
             // Vector types with custom alignment
             Field::Vector(2, ScalarType::f32).AlignAttribute(32),
@@ -724,6 +825,9 @@
             Field::Vector(2, ScalarType::u32).AlignAttribute(32),
             Field::Vector(3, ScalarType::u32).AlignAttribute(32),
             Field::Vector(4, ScalarType::u32).AlignAttribute(32),
+            Field::Vector(2, ScalarType::f16).AlignAttribute(32),
+            Field::Vector(3, ScalarType::f16).AlignAttribute(32),
+            Field::Vector(4, ScalarType::f16).AlignAttribute(32),
 
             // Vector types with custom size
             Field::Vector(2, ScalarType::f32).SizeAttribute(24),
@@ -735,6 +839,9 @@
             Field::Vector(2, ScalarType::u32).SizeAttribute(24),
             Field::Vector(3, ScalarType::u32).SizeAttribute(24),
             Field::Vector(4, ScalarType::u32).SizeAttribute(24),
+            Field::Vector(2, ScalarType::f16).SizeAttribute(24),
+            Field::Vector(3, ScalarType::f16).SizeAttribute(24),
+            Field::Vector(4, ScalarType::f16).SizeAttribute(24),
 
             // Matrix types with no custom alignment or size
             Field::Matrix(2, 2, ScalarType::f32),
@@ -746,6 +853,15 @@
             Field::Matrix(2, 4, ScalarType::f32),
             Field::Matrix(3, 4, ScalarType::f32),
             Field::Matrix(4, 4, ScalarType::f32),
+            Field::Matrix(2, 2, ScalarType::f16),
+            Field::Matrix(3, 2, ScalarType::f16),
+            Field::Matrix(4, 2, ScalarType::f16),
+            Field::Matrix(2, 3, ScalarType::f16),
+            Field::Matrix(3, 3, ScalarType::f16),
+            Field::Matrix(4, 3, ScalarType::f16),
+            Field::Matrix(2, 4, ScalarType::f16),
+            Field::Matrix(3, 4, ScalarType::f16),
+            Field::Matrix(4, 4, ScalarType::f16),
 
             // Matrix types with custom alignment
             Field::Matrix(2, 2, ScalarType::f32).AlignAttribute(32),
@@ -757,6 +873,15 @@
             Field::Matrix(2, 4, ScalarType::f32).AlignAttribute(32),
             Field::Matrix(3, 4, ScalarType::f32).AlignAttribute(32),
             Field::Matrix(4, 4, ScalarType::f32).AlignAttribute(32),
+            Field::Matrix(2, 2, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(3, 2, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(4, 2, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(2, 3, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(3, 3, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(4, 3, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(2, 4, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(3, 4, ScalarType::f16).AlignAttribute(32),
+            Field::Matrix(4, 4, ScalarType::f16).AlignAttribute(32),
 
             // Matrix types with custom size
             Field::Matrix(2, 2, ScalarType::f32).SizeAttribute(128),
@@ -768,85 +893,241 @@
             Field::Matrix(2, 4, ScalarType::f32).SizeAttribute(128),
             Field::Matrix(3, 4, ScalarType::f32).SizeAttribute(128),
             Field::Matrix(4, 4, ScalarType::f32).SizeAttribute(128),
+            Field::Matrix(2, 2, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(3, 2, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(4, 2, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(2, 3, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(3, 3, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(4, 3, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(2, 4, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(3, 4, ScalarType::f16).SizeAttribute(128),
+            Field::Matrix(4, 4, ScalarType::f16).SizeAttribute(128),
 
             // Array types with no custom alignment or size.
-            // Note: The use of StorageBufferOnly() is due to UBOs requiring 16 byte alignment
-            // of array elements. See https://www.w3.org/TR/WGSL/#storage-class-constraints
-            Field("array<u32, 1>", /* align */ 4, /* size */ 4).StorageBufferOnly(),
-            Field("array<u32, 2>", /* align */ 4, /* size */ 8).StorageBufferOnly(),
-            Field("array<u32, 3>", /* align */ 4, /* size */ 12).StorageBufferOnly(),
-            Field("array<u32, 4>", /* align */ 4, /* size */ 16).StorageBufferOnly(),
-            Field("array<vec2<u32>, 1>", /* align */ 8, /* size */ 8).StorageBufferOnly(),
-            Field("array<vec2<u32>, 2>", /* align */ 8, /* size */ 16).StorageBufferOnly(),
-            Field("array<vec2<u32>, 3>", /* align */ 8, /* size */ 24).StorageBufferOnly(),
-            Field("array<vec2<u32>, 4>", /* align */ 8, /* size */ 32).StorageBufferOnly(),
-            Field("array<vec3<u32>, 1>", /* align */ 16, /* size */ 16).Strided(12, 4),
-            Field("array<vec3<u32>, 2>", /* align */ 16, /* size */ 32).Strided(12, 4),
-            Field("array<vec3<u32>, 3>", /* align */ 16, /* size */ 48).Strided(12, 4),
-            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64).Strided(12, 4),
-            Field("array<vec4<u32>, 1>", /* align */ 16, /* size */ 16),
-            Field("array<vec4<u32>, 2>", /* align */ 16, /* size */ 32),
-            Field("array<vec4<u32>, 3>", /* align */ 16, /* size */ 48),
-            Field("array<vec4<u32>, 4>", /* align */ 16, /* size */ 64),
+            // Note: The use of StorageBufferOnly() is due to UBOs requiring 16 byte
+            // alignment of array elements. See
+            // https://www.w3.org/TR/WGSL/#storage-class-constraints
+            Field("array<u32, 1>", /* align */ 4, /* size */ 4, /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<u32, 2>", /* align */ 4, /* size */ 8, /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<u32, 3>", /* align */ 4, /* size */ 12, /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<u32, 4>", /* align */ 4, /* size */ 16, /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<vec2<u32>, 1>", /* align */ 8, /* size */ 8, /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<vec2<u32>, 2>", /* align */ 8, /* size */ 16,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<vec2<u32>, 3>", /* align */ 8, /* size */ 24,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<vec2<u32>, 4>", /* align */ 8, /* size */ 32,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<vec3<u32>, 1>", /* align */ 16, /* size */ 16,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<vec3<u32>, 2>", /* align */ 16, /* size */ 32,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<vec3<u32>, 3>", /* align */ 16, /* size */ 48,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<vec4<u32>, 1>", /* align */ 16, /* size */ 16,
+                  /* requireF16Feature */ false),
+            Field("array<vec4<u32>, 2>", /* align */ 16, /* size */ 32,
+                  /* requireF16Feature */ false),
+            Field("array<vec4<u32>, 3>", /* align */ 16, /* size */ 48,
+                  /* requireF16Feature */ false),
+            Field("array<vec4<u32>, 4>", /* align */ 16, /* size */ 64,
+                  /* requireF16Feature */ false),
 
             // Array types with custom alignment
-            Field("array<u32, 1>", /* align */ 4, /* size */ 4)
+            Field("array<u32, 1>", /* align */ 4, /* size */ 4, /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<u32, 2>", /* align */ 4, /* size */ 8)
+            Field("array<u32, 2>", /* align */ 4, /* size */ 8, /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<u32, 3>", /* align */ 4, /* size */ 12)
+            Field("array<u32, 3>", /* align */ 4, /* size */ 12, /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<u32, 4>", /* align */ 4, /* size */ 16)
+            Field("array<u32, 4>", /* align */ 4, /* size */ 16, /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<vec2<u32>, 1>", /* align */ 8, /* size */ 8)
+            Field("array<vec2<u32>, 1>", /* align */ 8, /* size */ 8, /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<vec2<u32>, 2>", /* align */ 8, /* size */ 16)
+            Field("array<vec2<u32>, 2>", /* align */ 8, /* size */ 16,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<vec2<u32>, 3>", /* align */ 8, /* size */ 24)
+            Field("array<vec2<u32>, 3>", /* align */ 8, /* size */ 24,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<vec2<u32>, 4>", /* align */ 8, /* size */ 32)
+            Field("array<vec2<u32>, 4>", /* align */ 8, /* size */ 32,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .StorageBufferOnly(),
-            Field("array<vec3<u32>, 1>", /* align */ 16, /* size */ 16)
+            Field("array<vec3<u32>, 1>", /* align */ 16, /* size */ 16,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .Strided(12, 4),
-            Field("array<vec3<u32>, 2>", /* align */ 16, /* size */ 32)
+            Field("array<vec3<u32>, 2>", /* align */ 16, /* size */ 32,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .Strided(12, 4),
-            Field("array<vec3<u32>, 3>", /* align */ 16, /* size */ 48)
+            Field("array<vec3<u32>, 3>", /* align */ 16, /* size */ 48,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .Strided(12, 4),
-            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64)
+            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64,
+                  /* requireF16Feature */ false)
                 .AlignAttribute(32)
                 .Strided(12, 4),
-            Field("array<vec4<u32>, 1>", /* align */ 16, /* size */ 16).AlignAttribute(32),
-            Field("array<vec4<u32>, 2>", /* align */ 16, /* size */ 32).AlignAttribute(32),
-            Field("array<vec4<u32>, 3>", /* align */ 16, /* size */ 48).AlignAttribute(32),
-            Field("array<vec4<u32>, 4>", /* align */ 16, /* size */ 64).AlignAttribute(32),
+            Field("array<vec4<u32>, 1>", /* align */ 16, /* size */ 16,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(32),
+            Field("array<vec4<u32>, 2>", /* align */ 16, /* size */ 32,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(32),
+            Field("array<vec4<u32>, 3>", /* align */ 16, /* size */ 48,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(32),
+            Field("array<vec4<u32>, 4>", /* align */ 16, /* size */ 64,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(32),
 
             // Array types with custom size
-            Field("array<u32, 1>", /* align */ 4, /* size */ 4)
+            Field("array<u32, 1>", /* align */ 4, /* size */ 4, /* requireF16Feature */ false)
                 .SizeAttribute(128)
                 .StorageBufferOnly(),
-            Field("array<u32, 2>", /* align */ 4, /* size */ 8)
+            Field("array<u32, 2>", /* align */ 4, /* size */ 8, /* requireF16Feature */ false)
                 .SizeAttribute(128)
                 .StorageBufferOnly(),
-            Field("array<u32, 3>", /* align */ 4, /* size */ 12)
+            Field("array<u32, 3>", /* align */ 4, /* size */ 12, /* requireF16Feature */ false)
                 .SizeAttribute(128)
                 .StorageBufferOnly(),
-            Field("array<u32, 4>", /* align */ 4, /* size */ 16)
+            Field("array<u32, 4>", /* align */ 4, /* size */ 16, /* requireF16Feature */ false)
                 .SizeAttribute(128)
                 .StorageBufferOnly(),
-            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64)
+            Field("array<vec3<u32>, 4>", /* align */ 16, /* size */ 64,
+                  /* requireF16Feature */ false)
                 .SizeAttribute(128)
                 .Strided(12, 4),
+
+            // Array of f32 matrix
+            Field("array<mat2x2<f32>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            // Uniform scope require the array alignment round up to 16.
+            Field("array<mat2x2<f32>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(16),
+            Field("array<mat2x3<f32>, 3>", /* align */ 16, /* size */ 96,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<mat2x4<f32>, 3>", /* align */ 16, /* size */ 96,
+                  /* requireF16Feature */ false),
+            Field("array<mat3x2<f32>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            // `mat3x2<f16>` can not be the element type of a uniform array, because its size 24 is
+            // not a multiple of 16.
+            Field("array<mat3x2<f32>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(16)
+                .StorageBufferOnly(),
+            Field("array<mat3x3<f32>, 3>", /* align */ 16, /* size */ 144,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<mat3x4<f32>, 3>", /* align */ 16, /* size */ 144,
+                  /* requireF16Feature */ false),
+            Field("array<mat4x2<f32>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ false)
+                .StorageBufferOnly(),
+            Field("array<mat4x2<f32>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ false)
+                .AlignAttribute(16),
+            Field("array<mat4x3<f32>, 3>", /* align */ 16, /* size */ 192,
+                  /* requireF16Feature */ false)
+                .Strided(12, 4),
+            Field("array<mat4x4<f32>, 3>", /* align */ 16, /* size */ 192,
+                  /* requireF16Feature */ false),
+
+            // Array of f16 matrix
+            Field("array<mat2x2<f16>, 3>", /* align */ 4, /* size */ 24,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            Field("array<mat2x3<f16>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .Strided(6, 2)
+                .StorageBufferOnly(),
+            Field("array<mat2x4<f16>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            Field("array<mat3x2<f16>, 3>", /* align */ 4, /* size */ 36,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            Field("array<mat3x3<f16>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ true)
+                .Strided(6, 2)
+                .StorageBufferOnly(),
+            Field("array<mat3x4<f16>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            Field("array<mat4x2<f16>, 3>", /* align */ 4, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            Field("array<mat4x3<f16>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ true)
+                .Strided(6, 2)
+                .StorageBufferOnly(),
+            Field("array<mat4x4<f16>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ true)
+                .StorageBufferOnly(),
+            // Uniform scope require the array alignment round up to 16, and array element size a
+            // multiple of 16.
+            Field("array<mat2x2<f16>, 3>", /* align */ 4, /* size */ 24,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .StorageBufferOnly(),
+            Field("array<mat2x3<f16>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .Strided(6, 2),
+            Field("array<mat2x4<f16>, 3>", /* align */ 8, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16),
+            Field("array<mat3x2<f16>, 3>", /* align */ 4, /* size */ 36,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .StorageBufferOnly(),
+            Field("array<mat3x3<f16>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .Strided(6, 2)
+                .StorageBufferOnly(),
+            Field("array<mat3x4<f16>, 3>", /* align */ 8, /* size */ 72,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .StorageBufferOnly(),
+            Field("array<mat4x2<f16>, 3>", /* align */ 4, /* size */ 48,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16),
+            Field("array<mat4x3<f16>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16)
+                .Strided(6, 2),
+            Field("array<mat4x4<f16>, 3>", /* align */ 8, /* size */ 96,
+                  /* requireF16Feature */ true)
+                .AlignAttribute(16),
         });
 
     std::vector<ComputeLayoutMemoryBufferTestParams> filtered;

diff --git a/src/tint/BUILD.gn b/src/tint/BUILD.gn
index 491fd7d..3741764 100644
--- a/src/tint/BUILD.gn
+++ b/src/tint/BUILD.gn

@@ -1246,6 +1246,8 @@
       "transform/single_entry_point_test.cc",
       "transform/spirv_atomic_test.cc",
       "transform/std140_exhaustive_test.cc",
+      "transform/std140_f16_test.cc",
+      "transform/std140_f32_test.cc",
       "transform/std140_test.cc",
       "transform/substitute_override_test.cc",
       "transform/test_helper.h",

diff --git a/src/tint/CMakeLists.txt b/src/tint/CMakeLists.txt
index dfe9433..a8644b7 100644
--- a/src/tint/CMakeLists.txt
+++ b/src/tint/CMakeLists.txt

@@ -1212,6 +1212,8 @@
       transform/single_entry_point_test.cc
       transform/spirv_atomic_test.cc
       transform/std140_exhaustive_test.cc
+      transform/std140_f16_test.cc
+      transform/std140_f32_test.cc
       transform/std140_test.cc
       transform/substitute_override_test.cc
       transform/test_helper.h

diff --git a/src/tint/resolver/address_space_layout_validation_test.cc b/src/tint/resolver/address_space_layout_validation_test.cc
index 82da573..b34b889 100644
--- a/src/tint/resolver/address_space_layout_validation_test.cc
+++ b/src/tint/resolver/address_space_layout_validation_test.cc

@@ -363,6 +363,29 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+// Make sure that this doesn't fail validation because vec3's align is 8, but
+// size is 6. 's' should be at offset 6, which is okay here.
+TEST_F(ResolverAddressSpaceLayoutValidationTest, UniformBuffer_Vec3F16MemberOffset_NoFail) {
+    // struct ScalarPackedAtEndOfVec3 {
+    //     v : vec3<f16>;
+    //     s : f16;
+    // };
+    // @group(0) @binding(0)
+    // var<uniform> a : ScalarPackedAtEndOfVec3;
+
+    Enable(ast::Extension::kF16);
+
+    Structure("ScalarPackedAtEndOfVec3", utils::Vector{
+                                             Member("v", ty.vec3(ty.f16())),
+                                             Member("s", ty.f16()),
+                                         });
+
+    GlobalVar(Source{{78, 90}}, "a", ty.type_name("ScalarPackedAtEndOfVec3"),
+              ast::AddressSpace::kUniform, Group(0_a), Binding(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 // Detect array stride must be a multiple of 16 bytes for uniform buffers
 TEST_F(ResolverAddressSpaceLayoutValidationTest, UniformBuffer_InvalidArrayStride_Scalar) {
     // type Inner = array<f32, 10u>;

diff --git a/src/tint/resolver/address_space_validation_test.cc b/src/tint/resolver/address_space_validation_test.cc
index 3ef19a6..60e54df 100644
--- a/src/tint/resolver/address_space_validation_test.cc
+++ b/src/tint/resolver/address_space_validation_test.cc

@@ -113,98 +113,6 @@
 56:78 note: while instantiating 'var' g)");
 }
 
-// F16 types in storage and uniform buffer is not implemented yet.
-// TODO(tint:1473, tint:1502): make these testcases valid after f16 is supported.
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferF16_TemporallyBan) {
-    // var<storage> g : f16;
-    Enable(ast::Extension::kF16);
-
-    GlobalVar("g", ty.f16(Source{{56, 78}}), ast::AddressSpace::kStorage, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_EQ(r()->error(),
-              "56:78 error: using f16 types in 'storage' address space is not "
-              "implemented yet");
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferF16Alias_TemporallyBan) {
-    // type a = f16;
-    // var<storage, read> g : a;
-    Enable(ast::Extension::kF16);
-
-    auto* a = Alias("a", ty.f16());
-    GlobalVar("g", ty.type_name(Source{{56, 78}}, a->name), ast::AddressSpace::kStorage,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_EQ(r()->error(),
-              "56:78 error: using f16 types in 'storage' address space is not "
-              "implemented yet");
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferVectorF16_TemporallyBan) {
-    // var<storage> g : vec4<f16>;
-    Enable(ast::Extension::kF16);
-    GlobalVar("g", ty.vec(Source{{56, 78}}, ty.Of<f16>(), 4u), ast::AddressSpace::kStorage,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_EQ(r()->error(),
-              "56:78 error: using f16 types in 'storage' address space is not "
-              "implemented yet");
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferArrayF16_TemporallyBan) {
-    // struct S { a : f16 };
-    // var<storage, read> g : array<S, 3u>;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure("S", utils::Vector{Member("a", ty.f16(Source{{56, 78}}))});
-    auto* a = ty.array(ty.Of(s), 3_u);
-    GlobalVar("g", a, ast::AddressSpace::kStorage, ast::Access::kRead, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("56:78 error: using f16 types in 'storage' address "
-                                        "space is not implemented yet"));
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructF16_TemporallyBan) {
-    // struct S { x : f16 };
-    // var<storage, read> g : S;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
-    GlobalVar("g", ty.Of(s), ast::AddressSpace::kStorage, ast::Access::kRead, Binding(0_a),
-              Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("12:34 error: using f16 types in 'storage' address "
-                                        "space is not implemented yet"));
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferNoErrorStructF16Aliases_TemporallyBan) {
-    // struct S { x : f16 };
-    // type a1 = S;
-    // var<storage, read> g : a1;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
-    auto* a1 = Alias("a1", ty.Of(s));
-    auto* a2 = Alias("a2", ty.Of(a1));
-    GlobalVar("g", ty.Of(a2), ast::AddressSpace::kStorage, ast::Access::kRead, Binding(0_a),
-              Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("12:34 error: using f16 types in 'storage' address "
-                                        "space is not implemented yet"));
-}
-
 TEST_F(ResolverAddressSpaceValidationTest, StorageBufferPointer) {
     // var<storage> g : ptr<private, f32>;
     GlobalVar(Source{{56, 78}}, "g", ty.pointer(ty.f32(), ast::AddressSpace::kPrivate),
@@ -226,6 +134,27 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferF16) {
+    // var<storage> g : f16;
+    Enable(ast::Extension::kF16);
+
+    GlobalVar("g", ty.f16(Source{{56, 78}}), ast::AddressSpace::kStorage, Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferF16Alias) {
+    // type a = f16;
+    // var<storage, read> g : a;
+    Enable(ast::Extension::kF16);
+
+    auto* a = Alias("a", ty.f16());
+    GlobalVar("g", ty.type_name(Source{{56, 78}}, a->name), ast::AddressSpace::kStorage,
+              Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, StorageBufferVectorF32) {
     // var<storage> g : vec4<f32>;
     GlobalVar(Source{{56, 78}}, "g", ty.vec4<f32>(), ast::AddressSpace::kStorage, Binding(0_a),
@@ -234,6 +163,15 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferVectorF16) {
+    // var<storage> g : vec4<f16>;
+    Enable(ast::Extension::kF16);
+    GlobalVar("g", ty.vec(Source{{56, 78}}, ty.Of<f16>(), 4u), ast::AddressSpace::kStorage,
+              Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, StorageBufferArrayF32) {
     // var<storage, read> g : array<S, 3u>;
     auto* s = Structure("S", utils::Vector{Member("a", ty.f32())});
@@ -244,6 +182,68 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferArrayF16) {
+    // var<storage, read> g : array<S, 3u>;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member("a", ty.f16())});
+    auto* a = ty.array(ty.Of(s), 3_u);
+    GlobalVar(Source{{56, 78}}, "g", a, ast::AddressSpace::kStorage, ast::Access::kRead,
+              Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructI32) {
+    // struct S { x : i32 };
+    // var<storage, read> g : S;
+    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.i32())});
+    GlobalVar(Source{{56, 78}}, "g", ty.Of(s), ast::AddressSpace::kStorage, ast::Access::kRead,
+              Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructI32Aliases) {
+    // struct S { x : i32 };
+    // type a1 = S;
+    // var<storage, read> g : a1;
+    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.i32())});
+    auto* a1 = Alias("a1", ty.Of(s));
+    auto* a2 = Alias("a2", ty.Of(a1));
+    GlobalVar(Source{{56, 78}}, "g", ty.Of(a2), ast::AddressSpace::kStorage, ast::Access::kRead,
+              Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructF16) {
+    // struct S { x : f16 };
+    // var<storage, read> g : S;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
+    GlobalVar("g", ty.Of(s), ast::AddressSpace::kStorage, ast::Access::kRead, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructF16Aliases) {
+    // struct S { x : f16 };
+    // type a1 = S;
+    // var<storage, read> g : a1;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
+    auto* a1 = Alias("a1", ty.Of(s));
+    auto* a2 = Alias("a2", ty.Of(a1));
+    GlobalVar("g", ty.Of(a2), ast::AddressSpace::kStorage, ast::Access::kRead, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, NotStorage_AccessMode) {
     // var<private, read> g : a;
     GlobalVar(Source{{56, 78}}, "g", ty.i32(), ast::AddressSpace::kPrivate, ast::Access::kRead);
@@ -282,29 +282,6 @@
               R"(56:78 error: access mode 'write' is not valid for the 'storage' address space)");
 }
 
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferStructI32) {
-    // struct S { x : i32 };
-    // var<storage, read> g : S;
-    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.i32())});
-    GlobalVar(Source{{56, 78}}, "g", ty.Of(s), ast::AddressSpace::kStorage, ast::Access::kRead,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_TRUE(r()->Resolve());
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, StorageBufferNoErrorStructI32Aliases) {
-    // struct S { x : i32 };
-    // type a1 = S;
-    // var<storage, read> g : a1;
-    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.i32())});
-    auto* a1 = Alias("a1", ty.Of(s));
-    auto* a2 = Alias("a2", ty.Of(a1));
-    GlobalVar(Source{{56, 78}}, "g", ty.Of(a2), ast::AddressSpace::kStorage, ast::Access::kRead,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_TRUE(r()->Resolve());
-}
-
 TEST_F(ResolverAddressSpaceValidationTest, UniformBuffer_Struct_Runtime) {
     // struct S { m:  array<f32>; };
     // @group(0) @binding(0) var<uniform, > svar : S;
@@ -349,97 +326,6 @@
 56:78 note: while instantiating 'var' g)");
 }
 
-// F16 types in storage and uniform buffer is not implemented yet.
-// TODO(tint:1473, tint:1502): make these testcases valid after f16 is supported.
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferF16_TemporallyBan) {
-    // var<uniform> g : f16;
-    Enable(ast::Extension::kF16);
-
-    GlobalVar("g", ty.f16(Source{{56, 78}}), ast::AddressSpace::kUniform, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_EQ(r()->error(),
-              "56:78 error: using f16 types in 'uniform' address space is not "
-              "implemented yet");
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferF16Alias_TemporallyBan) {
-    // type a = f16;
-    // var<uniform> g : a;
-    Enable(ast::Extension::kF16);
-
-    auto* a = Alias("a", ty.f16());
-    GlobalVar("g", ty.type_name(Source{{56, 78}}, a->name), ast::AddressSpace::kUniform,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_EQ(r()->error(),
-              "56:78 error: using f16 types in 'uniform' address space is not "
-              "implemented yet");
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferVectorF16_TemporallyBan) {
-    // var<uniform> g : vec4<f16>;
-    Enable(ast::Extension::kF16);
-    GlobalVar("g", ty.vec(Source{{56, 78}}, ty.Of<f16>(), 4u), ast::AddressSpace::kUniform,
-              Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("56:78 error: using f16 types in 'uniform' address "
-                                        "space is not implemented yet"));
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferArrayF16_TemporallyBan) {
-    // struct S {
-    //   @size(16) f : f16;
-    // }
-    // var<uniform> g : array<S, 3u>;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure(
-        "S", utils::Vector{Member("a", ty.f16(Source{{56, 78}}), utils::Vector{MemberSize(16_a)})});
-    auto* a = ty.array(ty.Of(s), 3_u);
-    GlobalVar("g", a, ast::AddressSpace::kUniform, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("56:78 error: using f16 types in 'uniform' address "
-                                        "space is not implemented yet"));
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferStructF16_TemporallyBan) {
-    // struct S { x : f16 };
-    // var<uniform> g :  S;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
-    GlobalVar("g", ty.Of(s), ast::AddressSpace::kUniform, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("12:34 error: using f16 types in 'uniform' address "
-                                        "space is not implemented yet"));
-}
-
-TEST_F(ResolverAddressSpaceValidationTest, UniformBufferStructF16Aliases_TemporallyBan) {
-    // struct S { x : f16 };
-    // type a1 = S;
-    // var<uniform> g : a1;
-    Enable(ast::Extension::kF16);
-
-    auto* s = Structure("S", utils::Vector{Member("x", ty.f16(Source{{12, 34}}))});
-    auto* a1 = Alias("a1", ty.Of(s));
-    GlobalVar("g", ty.Of(a1), ast::AddressSpace::kUniform, Binding(0_a), Group(0_a));
-
-    ASSERT_FALSE(r()->Resolve());
-
-    EXPECT_THAT(r()->error(), HasSubstr("12:34 error: using f16 types in 'uniform' address "
-                                        "space is not implemented yet"));
-}
-
 TEST_F(ResolverAddressSpaceValidationTest, UniformBufferPointer) {
     // var<uniform> g : ptr<private, f32>;
     GlobalVar(Source{{56, 78}}, "g", ty.pointer(ty.f32(), ast::AddressSpace::kPrivate),
@@ -461,6 +347,16 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, UniformBufferF16) {
+    // var<uniform> g : f16;
+    Enable(ast::Extension::kF16);
+
+    GlobalVar(Source{{56, 78}}, "g", ty.f16(), ast::AddressSpace::kUniform, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, UniformBufferVectorF32) {
     // var<uniform> g : vec4<f32>;
     GlobalVar(Source{{56, 78}}, "g", ty.vec4<f32>(), ast::AddressSpace::kUniform, Binding(0_a),
@@ -469,6 +365,16 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, UniformBufferVectorF16) {
+    // var<uniform> g : vec4<f16>;
+    Enable(ast::Extension::kF16);
+
+    GlobalVar(Source{{56, 78}}, "g", ty.vec4<f16>(), ast::AddressSpace::kUniform, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, UniformBufferArrayF32) {
     // struct S {
     //   @size(16) f : f32;
@@ -481,6 +387,20 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, UniformBufferArrayF16) {
+    // struct S {
+    //   @size(16) f : f16;
+    // }
+    // var<uniform> g : array<S, 3u>;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member("a", ty.f16(), utils::Vector{MemberSize(16_a)})});
+    auto* a = ty.array(ty.Of(s), 3_u);
+    GlobalVar(Source{{56, 78}}, "g", a, ast::AddressSpace::kUniform, Binding(0_a), Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, UniformBufferStructI32) {
     // struct S { x : i32 };
     // var<uniform> g :  S;
@@ -503,6 +423,32 @@
     ASSERT_TRUE(r()->Resolve()) << r()->error();
 }
 
+TEST_F(ResolverAddressSpaceValidationTest, UniformBufferStructF16) {
+    // struct S { x : f16 };
+    // var<uniform> g :  S;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.f16())});
+    GlobalVar(Source{{56, 78}}, "g", ty.Of(s), ast::AddressSpace::kUniform, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
+TEST_F(ResolverAddressSpaceValidationTest, UniformBufferStructF16Aliases) {
+    // struct S { x : f16 };
+    // type a1 = S;
+    // var<uniform> g : a1;
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("S", utils::Vector{Member(Source{{12, 34}}, "x", ty.f16())});
+    auto* a1 = Alias("a1", ty.Of(s));
+    GlobalVar(Source{{56, 78}}, "g", ty.Of(a1), ast::AddressSpace::kUniform, Binding(0_a),
+              Group(0_a));
+
+    ASSERT_TRUE(r()->Resolve()) << r()->error();
+}
+
 TEST_F(ResolverAddressSpaceValidationTest, PushConstantBool) {
     // enable chromium_experimental_push_constant;
     // var<push_constant> g : bool;

diff --git a/src/tint/resolver/validator.cc b/src/tint/resolver/validator.cc
index 2fc8f21..70d9f8e 100644
--- a/src/tint/resolver/validator.cc
+++ b/src/tint/resolver/validator.cc

@@ -395,13 +395,11 @@
         return true;
     }
 
-    // Temporally forbid using f16 types in "uniform" and "storage" address space.
-    // TODO(tint:1473, tint:1502): Remove this error after f16 is supported in "uniform" and
-    // "storage" address space but keep for "push_constant" address space.
-    if (Is<sem::F16>(sem::Type::DeepestElementOf(store_ty))) {
-        AddError("using f16 types in '" + utils::ToString(address_space) +
-                     "' address space is not implemented yet",
-                 source);
+    // Among three host-shareable address spaces, f16 is supported in "uniform" and
+    // "storage" address space, but not "push_constant" address space yet.
+    if (Is<sem::F16>(sem::Type::DeepestElementOf(store_ty)) &&
+        address_space == ast::AddressSpace::kPushConstant) {
+        AddError("using f16 types in 'push_constant' address space is not implemented yet", source);
         return false;
     }
 

diff --git a/src/tint/transform/decompose_memory_access.cc b/src/tint/transform/decompose_memory_access.cc
index 046583e..3be550c 100644
--- a/src/tint/transform/decompose_memory_access.cc
+++ b/src/tint/transform/decompose_memory_access.cc

@@ -153,6 +153,10 @@
         out = DecomposeMemoryAccess::Intrinsic::DataType::kF32;
         return true;
     }
+    if (ty->Is<sem::F16>()) {
+        out = DecomposeMemoryAccess::Intrinsic::DataType::kF16;
+        return true;
+    }
     if (auto* vec = ty->As<sem::Vector>()) {
         switch (vec->Width()) {
             case 2:
@@ -168,6 +172,10 @@
                     out = DecomposeMemoryAccess::Intrinsic::DataType::kVec2F32;
                     return true;
                 }
+                if (vec->type()->Is<sem::F16>()) {
+                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec2F16;
+                    return true;
+                }
                 break;
             case 3:
                 if (vec->type()->Is<sem::I32>()) {
@@ -182,6 +190,10 @@
                     out = DecomposeMemoryAccess::Intrinsic::DataType::kVec3F32;
                     return true;
                 }
+                if (vec->type()->Is<sem::F16>()) {
+                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec3F16;
+                    return true;
+                }
                 break;
             case 4:
                 if (vec->type()->Is<sem::I32>()) {
@@ -196,6 +208,10 @@
                     out = DecomposeMemoryAccess::Intrinsic::DataType::kVec4F32;
                     return true;
                 }
+                if (vec->type()->Is<sem::F16>()) {
+                    out = DecomposeMemoryAccess::Intrinsic::DataType::kVec4F16;
+                    return true;
+                }
                 break;
         }
         return false;
@@ -776,6 +792,9 @@
         case DataType::kI32:
             ss << "i32";
             break;
+        case DataType::kF16:
+            ss << "f16";
+            break;
         case DataType::kVec2U32:
             ss << "vec2_u32";
             break;
@@ -785,6 +804,9 @@
         case DataType::kVec2I32:
             ss << "vec2_i32";
             break;
+        case DataType::kVec2F16:
+            ss << "vec2_f16";
+            break;
         case DataType::kVec3U32:
             ss << "vec3_u32";
             break;
@@ -794,6 +816,9 @@
         case DataType::kVec3I32:
             ss << "vec3_i32";
             break;
+        case DataType::kVec3F16:
+            ss << "vec3_f16";
+            break;
         case DataType::kVec4U32:
             ss << "vec4_u32";
             break;
@@ -803,6 +828,9 @@
         case DataType::kVec4I32:
             ss << "vec4_i32";
             break;
+        case DataType::kVec4F16:
+            ss << "vec4_f16";
+            break;
     }
     return ss.str();
 }

diff --git a/src/tint/transform/decompose_memory_access.h b/src/tint/transform/decompose_memory_access.h
index 21c196b..3c620e0 100644
--- a/src/tint/transform/decompose_memory_access.h
+++ b/src/tint/transform/decompose_memory_access.h

@@ -60,15 +60,19 @@
             kU32,
             kF32,
             kI32,
+            kF16,
             kVec2U32,
             kVec2F32,
             kVec2I32,
+            kVec2F16,
             kVec3U32,
             kVec3F32,
             kVec3I32,
+            kVec3F16,
             kVec4U32,
             kVec4F32,
             kVec4I32,
+            kVec4F16,
         };
 
         /// Constructor

diff --git a/src/tint/transform/decompose_memory_access_test.cc b/src/tint/transform/decompose_memory_access_test.cc
index 581731e..ac798e0 100644
--- a/src/tint/transform/decompose_memory_access_test.cc
+++ b/src/tint/transform/decompose_memory_access_test.cc

@@ -51,192 +51,308 @@
 
 TEST_F(DecomposeMemoryAccessTest, SB_BasicLoad) {
     auto* src = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_vec3_f16 : array<vec3<f16>, 2>,
 };
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = sb.a;
-  var b : u32 = sb.b;
-  var c : f32 = sb.c;
-  var d : vec2<i32> = sb.d;
-  var e : vec2<u32> = sb.e;
-  var f : vec2<f32> = sb.f;
-  var g : vec3<i32> = sb.g;
-  var h : vec3<u32> = sb.h;
-  var i : vec3<f32> = sb.i;
-  var j : vec4<i32> = sb.j;
-  var k : vec4<u32> = sb.k;
-  var l : vec4<f32> = sb.l;
-  var m : mat2x2<f32> = sb.m;
-  var n : mat2x3<f32> = sb.n;
-  var o : mat2x4<f32> = sb.o;
-  var p : mat3x2<f32> = sb.p;
-  var q : mat3x3<f32> = sb.q;
-  var r : mat3x4<f32> = sb.r;
-  var s : mat4x2<f32> = sb.s;
-  var t : mat4x3<f32> = sb.t;
-  var u : mat4x4<f32> = sb.u;
-  var v : array<vec3<f32>, 2> = sb.v;
+  var scalar_f32 : f32 = sb.scalar_f32;
+  var scalar_i32 : i32 = sb.scalar_i32;
+  var scalar_u32 : u32 = sb.scalar_u32;
+  var scalar_f16 : f16 = sb.scalar_f16;
+  var vec2_f32 : vec2<f32> = sb.vec2_f32;
+  var vec2_i32 : vec2<i32> = sb.vec2_i32;
+  var vec2_u32 : vec2<u32> = sb.vec2_u32;
+  var vec2_f16 : vec2<f16> = sb.vec2_f16;
+  var vec3_f32 : vec3<f32> = sb.vec3_f32;
+  var vec3_i32 : vec3<i32> = sb.vec3_i32;
+  var vec3_u32 : vec3<u32> = sb.vec3_u32;
+  var vec3_f16 : vec3<f16> = sb.vec3_f16;
+  var vec4_f32 : vec4<f32> = sb.vec4_f32;
+  var vec4_i32 : vec4<i32> = sb.vec4_i32;
+  var vec4_u32 : vec4<u32> = sb.vec4_u32;
+  var vec4_f16 : vec4<f16> = sb.vec4_f16;
+  var mat2x2_f32 : mat2x2<f32> = sb.mat2x2_f32;
+  var mat2x3_f32 : mat2x3<f32> = sb.mat2x3_f32;
+  var mat2x4_f32 : mat2x4<f32> = sb.mat2x4_f32;
+  var mat3x2_f32 : mat3x2<f32> = sb.mat3x2_f32;
+  var mat3x3_f32 : mat3x3<f32> = sb.mat3x3_f32;
+  var mat3x4_f32 : mat3x4<f32> = sb.mat3x4_f32;
+  var mat4x2_f32 : mat4x2<f32> = sb.mat4x2_f32;
+  var mat4x3_f32 : mat4x3<f32> = sb.mat4x3_f32;
+  var mat4x4_f32 : mat4x4<f32> = sb.mat4x4_f32;
+  var mat2x2_f16 : mat2x2<f16> = sb.mat2x2_f16;
+  var mat2x3_f16 : mat2x3<f16> = sb.mat2x3_f16;
+  var mat2x4_f16 : mat2x4<f16> = sb.mat2x4_f16;
+  var mat3x2_f16 : mat3x2<f16> = sb.mat3x2_f16;
+  var mat3x3_f16 : mat3x3<f16> = sb.mat3x3_f16;
+  var mat3x4_f16 : mat3x4<f16> = sb.mat3x4_f16;
+  var mat4x2_f16 : mat4x2<f16> = sb.mat4x2_f16;
+  var mat4x3_f16 : mat4x3<f16> = sb.mat4x3_f16;
+  var mat4x4_f16 : mat4x4<f16> = sb.mat4x4_f16;
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = sb.arr2_vec3_f32;
+  var arr2_vec3_f16 : array<vec3<f16>, 2> = sb.arr2_vec3_f16;
 }
 )";
 
     auto* expect = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_vec3_f16 : array<vec3<f16>, 2>,
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
+@internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
+
 @internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
 
 @internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
 
-@internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
-
-@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
-
-@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f16
 
 @internal(intrinsic_load_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
+@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)));
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)));
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_4(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)), tint_symbol_11(buffer, (offset + 48u)));
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)), tint_symbol_7(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)), tint_symbol_15(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_8(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_8(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f16>, 2u> {
+  var arr_1 : array<vec3<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_11(buffer, (offset + (i_1 * 8u)));
+  }
+  return arr_1;
+}
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = tint_symbol(&(sb), 0u);
-  var b : u32 = tint_symbol_1(&(sb), 4u);
-  var c : f32 = tint_symbol_2(&(sb), 8u);
-  var d : vec2<i32> = tint_symbol_3(&(sb), 16u);
-  var e : vec2<u32> = tint_symbol_4(&(sb), 24u);
-  var f : vec2<f32> = tint_symbol_5(&(sb), 32u);
-  var g : vec3<i32> = tint_symbol_6(&(sb), 48u);
-  var h : vec3<u32> = tint_symbol_7(&(sb), 64u);
-  var i : vec3<f32> = tint_symbol_8(&(sb), 80u);
-  var j : vec4<i32> = tint_symbol_9(&(sb), 96u);
-  var k : vec4<u32> = tint_symbol_10(&(sb), 112u);
-  var l : vec4<f32> = tint_symbol_11(&(sb), 128u);
-  var m : mat2x2<f32> = tint_symbol_12(&(sb), 144u);
-  var n : mat2x3<f32> = tint_symbol_13(&(sb), 160u);
-  var o : mat2x4<f32> = tint_symbol_14(&(sb), 192u);
-  var p : mat3x2<f32> = tint_symbol_15(&(sb), 224u);
-  var q : mat3x3<f32> = tint_symbol_16(&(sb), 256u);
-  var r : mat3x4<f32> = tint_symbol_17(&(sb), 304u);
-  var s : mat4x2<f32> = tint_symbol_18(&(sb), 352u);
-  var t : mat4x3<f32> = tint_symbol_19(&(sb), 384u);
-  var u : mat4x4<f32> = tint_symbol_20(&(sb), 448u);
-  var v : array<vec3<f32>, 2> = tint_symbol_21(&(sb), 512u);
+  var scalar_f32 : f32 = tint_symbol(&(sb), 0u);
+  var scalar_i32 : i32 = tint_symbol_1(&(sb), 4u);
+  var scalar_u32 : u32 = tint_symbol_2(&(sb), 8u);
+  var scalar_f16 : f16 = tint_symbol_3(&(sb), 12u);
+  var vec2_f32 : vec2<f32> = tint_symbol_4(&(sb), 16u);
+  var vec2_i32 : vec2<i32> = tint_symbol_5(&(sb), 24u);
+  var vec2_u32 : vec2<u32> = tint_symbol_6(&(sb), 32u);
+  var vec2_f16 : vec2<f16> = tint_symbol_7(&(sb), 40u);
+  var vec3_f32 : vec3<f32> = tint_symbol_8(&(sb), 48u);
+  var vec3_i32 : vec3<i32> = tint_symbol_9(&(sb), 64u);
+  var vec3_u32 : vec3<u32> = tint_symbol_10(&(sb), 80u);
+  var vec3_f16 : vec3<f16> = tint_symbol_11(&(sb), 96u);
+  var vec4_f32 : vec4<f32> = tint_symbol_12(&(sb), 112u);
+  var vec4_i32 : vec4<i32> = tint_symbol_13(&(sb), 128u);
+  var vec4_u32 : vec4<u32> = tint_symbol_14(&(sb), 144u);
+  var vec4_f16 : vec4<f16> = tint_symbol_15(&(sb), 160u);
+  var mat2x2_f32 : mat2x2<f32> = tint_symbol_16(&(sb), 168u);
+  var mat2x3_f32 : mat2x3<f32> = tint_symbol_17(&(sb), 192u);
+  var mat2x4_f32 : mat2x4<f32> = tint_symbol_18(&(sb), 224u);
+  var mat3x2_f32 : mat3x2<f32> = tint_symbol_19(&(sb), 256u);
+  var mat3x3_f32 : mat3x3<f32> = tint_symbol_20(&(sb), 288u);
+  var mat3x4_f32 : mat3x4<f32> = tint_symbol_21(&(sb), 336u);
+  var mat4x2_f32 : mat4x2<f32> = tint_symbol_22(&(sb), 384u);
+  var mat4x3_f32 : mat4x3<f32> = tint_symbol_23(&(sb), 416u);
+  var mat4x4_f32 : mat4x4<f32> = tint_symbol_24(&(sb), 480u);
+  var mat2x2_f16 : mat2x2<f16> = tint_symbol_25(&(sb), 544u);
+  var mat2x3_f16 : mat2x3<f16> = tint_symbol_26(&(sb), 552u);
+  var mat2x4_f16 : mat2x4<f16> = tint_symbol_27(&(sb), 568u);
+  var mat3x2_f16 : mat3x2<f16> = tint_symbol_28(&(sb), 584u);
+  var mat3x3_f16 : mat3x3<f16> = tint_symbol_29(&(sb), 600u);
+  var mat3x4_f16 : mat3x4<f16> = tint_symbol_30(&(sb), 624u);
+  var mat4x2_f16 : mat4x2<f16> = tint_symbol_31(&(sb), 648u);
+  var mat4x3_f16 : mat4x3<f16> = tint_symbol_32(&(sb), 664u);
+  var mat4x4_f16 : mat4x4<f16> = tint_symbol_33(&(sb), 696u);
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = tint_symbol_34(&(sb), 736u);
+  var arr2_vec3_f16 : array<vec3<f16>, 2> = tint_symbol_35(&(sb), 768u);
 }
 )";
 
@@ -247,192 +363,308 @@
 
 TEST_F(DecomposeMemoryAccessTest, SB_BasicLoad_OutOfOrder) {
     auto* src = R"(
+enable f16;
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = sb.a;
-  var b : u32 = sb.b;
-  var c : f32 = sb.c;
-  var d : vec2<i32> = sb.d;
-  var e : vec2<u32> = sb.e;
-  var f : vec2<f32> = sb.f;
-  var g : vec3<i32> = sb.g;
-  var h : vec3<u32> = sb.h;
-  var i : vec3<f32> = sb.i;
-  var j : vec4<i32> = sb.j;
-  var k : vec4<u32> = sb.k;
-  var l : vec4<f32> = sb.l;
-  var m : mat2x2<f32> = sb.m;
-  var n : mat2x3<f32> = sb.n;
-  var o : mat2x4<f32> = sb.o;
-  var p : mat3x2<f32> = sb.p;
-  var q : mat3x3<f32> = sb.q;
-  var r : mat3x4<f32> = sb.r;
-  var s : mat4x2<f32> = sb.s;
-  var t : mat4x3<f32> = sb.t;
-  var u : mat4x4<f32> = sb.u;
-  var v : array<vec3<f32>, 2> = sb.v;
+  var scalar_f32 : f32 = sb.scalar_f32;
+  var scalar_i32 : i32 = sb.scalar_i32;
+  var scalar_u32 : u32 = sb.scalar_u32;
+  var scalar_f16 : f16 = sb.scalar_f16;
+  var vec2_f32 : vec2<f32> = sb.vec2_f32;
+  var vec2_i32 : vec2<i32> = sb.vec2_i32;
+  var vec2_u32 : vec2<u32> = sb.vec2_u32;
+  var vec2_f16 : vec2<f16> = sb.vec2_f16;
+  var vec3_f32 : vec3<f32> = sb.vec3_f32;
+  var vec3_i32 : vec3<i32> = sb.vec3_i32;
+  var vec3_u32 : vec3<u32> = sb.vec3_u32;
+  var vec3_f16 : vec3<f16> = sb.vec3_f16;
+  var vec4_f32 : vec4<f32> = sb.vec4_f32;
+  var vec4_i32 : vec4<i32> = sb.vec4_i32;
+  var vec4_u32 : vec4<u32> = sb.vec4_u32;
+  var vec4_f16 : vec4<f16> = sb.vec4_f16;
+  var mat2x2_f32 : mat2x2<f32> = sb.mat2x2_f32;
+  var mat2x3_f32 : mat2x3<f32> = sb.mat2x3_f32;
+  var mat2x4_f32 : mat2x4<f32> = sb.mat2x4_f32;
+  var mat3x2_f32 : mat3x2<f32> = sb.mat3x2_f32;
+  var mat3x3_f32 : mat3x3<f32> = sb.mat3x3_f32;
+  var mat3x4_f32 : mat3x4<f32> = sb.mat3x4_f32;
+  var mat4x2_f32 : mat4x2<f32> = sb.mat4x2_f32;
+  var mat4x3_f32 : mat4x3<f32> = sb.mat4x3_f32;
+  var mat4x4_f32 : mat4x4<f32> = sb.mat4x4_f32;
+  var mat2x2_f16 : mat2x2<f16> = sb.mat2x2_f16;
+  var mat2x3_f16 : mat2x3<f16> = sb.mat2x3_f16;
+  var mat2x4_f16 : mat2x4<f16> = sb.mat2x4_f16;
+  var mat3x2_f16 : mat3x2<f16> = sb.mat3x2_f16;
+  var mat3x3_f16 : mat3x3<f16> = sb.mat3x3_f16;
+  var mat3x4_f16 : mat3x4<f16> = sb.mat3x4_f16;
+  var mat4x2_f16 : mat4x2<f16> = sb.mat4x2_f16;
+  var mat4x3_f16 : mat4x3<f16> = sb.mat4x3_f16;
+  var mat4x4_f16 : mat4x4<f16> = sb.mat4x4_f16;
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = sb.arr2_vec3_f32;
+  var arr2_vec3_f16 : array<vec3<f16>, 2> = sb.arr2_vec3_f16;
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_vec3_f16 : array<vec3<f16>, 2>,
 };
 )";
 
     auto* expect = R"(
-@internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
-
-@internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+enable f16;
 
 @internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
 
-@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
+@internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
 
-@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+
+@internal(intrinsic_load_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f16
 
 @internal(intrinsic_load_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
+@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)));
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)));
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_4(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)), tint_symbol_11(buffer, (offset + 48u)));
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)), tint_symbol_7(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)), tint_symbol_15(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_8(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_8(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f16>, 2u> {
+  var arr_1 : array<vec3<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_11(buffer, (offset + (i_1 * 8u)));
+  }
+  return arr_1;
+}
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = tint_symbol(&(sb), 0u);
-  var b : u32 = tint_symbol_1(&(sb), 4u);
-  var c : f32 = tint_symbol_2(&(sb), 8u);
-  var d : vec2<i32> = tint_symbol_3(&(sb), 16u);
-  var e : vec2<u32> = tint_symbol_4(&(sb), 24u);
-  var f : vec2<f32> = tint_symbol_5(&(sb), 32u);
-  var g : vec3<i32> = tint_symbol_6(&(sb), 48u);
-  var h : vec3<u32> = tint_symbol_7(&(sb), 64u);
-  var i : vec3<f32> = tint_symbol_8(&(sb), 80u);
-  var j : vec4<i32> = tint_symbol_9(&(sb), 96u);
-  var k : vec4<u32> = tint_symbol_10(&(sb), 112u);
-  var l : vec4<f32> = tint_symbol_11(&(sb), 128u);
-  var m : mat2x2<f32> = tint_symbol_12(&(sb), 144u);
-  var n : mat2x3<f32> = tint_symbol_13(&(sb), 160u);
-  var o : mat2x4<f32> = tint_symbol_14(&(sb), 192u);
-  var p : mat3x2<f32> = tint_symbol_15(&(sb), 224u);
-  var q : mat3x3<f32> = tint_symbol_16(&(sb), 256u);
-  var r : mat3x4<f32> = tint_symbol_17(&(sb), 304u);
-  var s : mat4x2<f32> = tint_symbol_18(&(sb), 352u);
-  var t : mat4x3<f32> = tint_symbol_19(&(sb), 384u);
-  var u : mat4x4<f32> = tint_symbol_20(&(sb), 448u);
-  var v : array<vec3<f32>, 2> = tint_symbol_21(&(sb), 512u);
+  var scalar_f32 : f32 = tint_symbol(&(sb), 0u);
+  var scalar_i32 : i32 = tint_symbol_1(&(sb), 4u);
+  var scalar_u32 : u32 = tint_symbol_2(&(sb), 8u);
+  var scalar_f16 : f16 = tint_symbol_3(&(sb), 12u);
+  var vec2_f32 : vec2<f32> = tint_symbol_4(&(sb), 16u);
+  var vec2_i32 : vec2<i32> = tint_symbol_5(&(sb), 24u);
+  var vec2_u32 : vec2<u32> = tint_symbol_6(&(sb), 32u);
+  var vec2_f16 : vec2<f16> = tint_symbol_7(&(sb), 40u);
+  var vec3_f32 : vec3<f32> = tint_symbol_8(&(sb), 48u);
+  var vec3_i32 : vec3<i32> = tint_symbol_9(&(sb), 64u);
+  var vec3_u32 : vec3<u32> = tint_symbol_10(&(sb), 80u);
+  var vec3_f16 : vec3<f16> = tint_symbol_11(&(sb), 96u);
+  var vec4_f32 : vec4<f32> = tint_symbol_12(&(sb), 112u);
+  var vec4_i32 : vec4<i32> = tint_symbol_13(&(sb), 128u);
+  var vec4_u32 : vec4<u32> = tint_symbol_14(&(sb), 144u);
+  var vec4_f16 : vec4<f16> = tint_symbol_15(&(sb), 160u);
+  var mat2x2_f32 : mat2x2<f32> = tint_symbol_16(&(sb), 168u);
+  var mat2x3_f32 : mat2x3<f32> = tint_symbol_17(&(sb), 192u);
+  var mat2x4_f32 : mat2x4<f32> = tint_symbol_18(&(sb), 224u);
+  var mat3x2_f32 : mat3x2<f32> = tint_symbol_19(&(sb), 256u);
+  var mat3x3_f32 : mat3x3<f32> = tint_symbol_20(&(sb), 288u);
+  var mat3x4_f32 : mat3x4<f32> = tint_symbol_21(&(sb), 336u);
+  var mat4x2_f32 : mat4x2<f32> = tint_symbol_22(&(sb), 384u);
+  var mat4x3_f32 : mat4x3<f32> = tint_symbol_23(&(sb), 416u);
+  var mat4x4_f32 : mat4x4<f32> = tint_symbol_24(&(sb), 480u);
+  var mat2x2_f16 : mat2x2<f16> = tint_symbol_25(&(sb), 544u);
+  var mat2x3_f16 : mat2x3<f16> = tint_symbol_26(&(sb), 552u);
+  var mat2x4_f16 : mat2x4<f16> = tint_symbol_27(&(sb), 568u);
+  var mat3x2_f16 : mat3x2<f16> = tint_symbol_28(&(sb), 584u);
+  var mat3x3_f16 : mat3x3<f16> = tint_symbol_29(&(sb), 600u);
+  var mat3x4_f16 : mat3x4<f16> = tint_symbol_30(&(sb), 624u);
+  var mat4x2_f16 : mat4x2<f16> = tint_symbol_31(&(sb), 648u);
+  var mat4x3_f16 : mat4x3<f16> = tint_symbol_32(&(sb), 664u);
+  var mat4x4_f16 : mat4x4<f16> = tint_symbol_33(&(sb), 696u);
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = tint_symbol_34(&(sb), 736u);
+  var arr2_vec3_f16 : array<vec3<f16>, 2> = tint_symbol_35(&(sb), 768u);
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_vec3_f16 : array<vec3<f16>, 2>,
 }
 )";
 
@@ -443,192 +675,308 @@
 
 TEST_F(DecomposeMemoryAccessTest, UB_BasicLoad) {
     auto* src = R"(
+enable f16;
+
 struct UB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 
 @group(0) @binding(0) var<uniform> ub : UB;
 
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = ub.a;
-  var b : u32 = ub.b;
-  var c : f32 = ub.c;
-  var d : vec2<i32> = ub.d;
-  var e : vec2<u32> = ub.e;
-  var f : vec2<f32> = ub.f;
-  var g : vec3<i32> = ub.g;
-  var h : vec3<u32> = ub.h;
-  var i : vec3<f32> = ub.i;
-  var j : vec4<i32> = ub.j;
-  var k : vec4<u32> = ub.k;
-  var l : vec4<f32> = ub.l;
-  var m : mat2x2<f32> = ub.m;
-  var n : mat2x3<f32> = ub.n;
-  var o : mat2x4<f32> = ub.o;
-  var p : mat3x2<f32> = ub.p;
-  var q : mat3x3<f32> = ub.q;
-  var r : mat3x4<f32> = ub.r;
-  var s : mat4x2<f32> = ub.s;
-  var t : mat4x3<f32> = ub.t;
-  var u : mat4x4<f32> = ub.u;
-  var v : array<vec3<f32>, 2> = ub.v;
+  var scalar_f32 : f32 = ub.scalar_f32;
+  var scalar_i32 : i32 = ub.scalar_i32;
+  var scalar_u32 : u32 = ub.scalar_u32;
+  var scalar_f16 : f16 = ub.scalar_f16;
+  var vec2_f32 : vec2<f32> = ub.vec2_f32;
+  var vec2_i32 : vec2<i32> = ub.vec2_i32;
+  var vec2_u32 : vec2<u32> = ub.vec2_u32;
+  var vec2_f16 : vec2<f16> = ub.vec2_f16;
+  var vec3_f32 : vec3<f32> = ub.vec3_f32;
+  var vec3_i32 : vec3<i32> = ub.vec3_i32;
+  var vec3_u32 : vec3<u32> = ub.vec3_u32;
+  var vec3_f16 : vec3<f16> = ub.vec3_f16;
+  var vec4_f32 : vec4<f32> = ub.vec4_f32;
+  var vec4_i32 : vec4<i32> = ub.vec4_i32;
+  var vec4_u32 : vec4<u32> = ub.vec4_u32;
+  var vec4_f16 : vec4<f16> = ub.vec4_f16;
+  var mat2x2_f32 : mat2x2<f32> = ub.mat2x2_f32;
+  var mat2x3_f32 : mat2x3<f32> = ub.mat2x3_f32;
+  var mat2x4_f32 : mat2x4<f32> = ub.mat2x4_f32;
+  var mat3x2_f32 : mat3x2<f32> = ub.mat3x2_f32;
+  var mat3x3_f32 : mat3x3<f32> = ub.mat3x3_f32;
+  var mat3x4_f32 : mat3x4<f32> = ub.mat3x4_f32;
+  var mat4x2_f32 : mat4x2<f32> = ub.mat4x2_f32;
+  var mat4x3_f32 : mat4x3<f32> = ub.mat4x3_f32;
+  var mat4x4_f32 : mat4x4<f32> = ub.mat4x4_f32;
+  var mat2x2_f16 : mat2x2<f16> = ub.mat2x2_f16;
+  var mat2x3_f16 : mat2x3<f16> = ub.mat2x3_f16;
+  var mat2x4_f16 : mat2x4<f16> = ub.mat2x4_f16;
+  var mat3x2_f16 : mat3x2<f16> = ub.mat3x2_f16;
+  var mat3x3_f16 : mat3x3<f16> = ub.mat3x3_f16;
+  var mat3x4_f16 : mat3x4<f16> = ub.mat3x4_f16;
+  var mat4x2_f16 : mat4x2<f16> = ub.mat4x2_f16;
+  var mat4x3_f16 : mat4x3<f16> = ub.mat4x3_f16;
+  var mat4x4_f16 : mat4x4<f16> = ub.mat4x4_f16;
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = ub.arr2_vec3_f32;
+  var arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = ub.arr2_mat4x2_f16;
 }
 )";
 
     auto* expect = R"(
+enable f16;
+
 struct UB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 
 @group(0) @binding(0) var<uniform> ub : UB;
 
+@internal(intrinsic_load_uniform_f32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f32
+
 @internal(intrinsic_load_uniform_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> i32
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> i32
 
 @internal(intrinsic_load_uniform_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> u32
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> u32
 
-@internal(intrinsic_load_uniform_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f32
-
-@internal(intrinsic_load_uniform_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<i32>
-
-@internal(intrinsic_load_uniform_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_uniform_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f16
 
 @internal(intrinsic_load_uniform_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f32>
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_uniform_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_uniform_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_uniform_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_uniform_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_uniform_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_uniform_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_uniform_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_uniform_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_uniform_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_uniform_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_uniform_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_uniform_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f32>
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
+@internal(intrinsic_load_uniform_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_uniform_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_uniform_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)));
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)));
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_4(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)), tint_symbol_11(buffer, (offset + 48u)));
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)), tint_symbol_7(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)), tint_symbol_15(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_8(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_8(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<mat4x2<f16>, 2u> {
+  var arr_1 : array<mat4x2<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_31(buffer, (offset + (i_1 * 16u)));
+  }
+  return arr_1;
+}
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = tint_symbol(&(ub), 0u);
-  var b : u32 = tint_symbol_1(&(ub), 4u);
-  var c : f32 = tint_symbol_2(&(ub), 8u);
-  var d : vec2<i32> = tint_symbol_3(&(ub), 16u);
-  var e : vec2<u32> = tint_symbol_4(&(ub), 24u);
-  var f : vec2<f32> = tint_symbol_5(&(ub), 32u);
-  var g : vec3<i32> = tint_symbol_6(&(ub), 48u);
-  var h : vec3<u32> = tint_symbol_7(&(ub), 64u);
-  var i : vec3<f32> = tint_symbol_8(&(ub), 80u);
-  var j : vec4<i32> = tint_symbol_9(&(ub), 96u);
-  var k : vec4<u32> = tint_symbol_10(&(ub), 112u);
-  var l : vec4<f32> = tint_symbol_11(&(ub), 128u);
-  var m : mat2x2<f32> = tint_symbol_12(&(ub), 144u);
-  var n : mat2x3<f32> = tint_symbol_13(&(ub), 160u);
-  var o : mat2x4<f32> = tint_symbol_14(&(ub), 192u);
-  var p : mat3x2<f32> = tint_symbol_15(&(ub), 224u);
-  var q : mat3x3<f32> = tint_symbol_16(&(ub), 256u);
-  var r : mat3x4<f32> = tint_symbol_17(&(ub), 304u);
-  var s : mat4x2<f32> = tint_symbol_18(&(ub), 352u);
-  var t : mat4x3<f32> = tint_symbol_19(&(ub), 384u);
-  var u : mat4x4<f32> = tint_symbol_20(&(ub), 448u);
-  var v : array<vec3<f32>, 2> = tint_symbol_21(&(ub), 512u);
+  var scalar_f32 : f32 = tint_symbol(&(ub), 0u);
+  var scalar_i32 : i32 = tint_symbol_1(&(ub), 4u);
+  var scalar_u32 : u32 = tint_symbol_2(&(ub), 8u);
+  var scalar_f16 : f16 = tint_symbol_3(&(ub), 12u);
+  var vec2_f32 : vec2<f32> = tint_symbol_4(&(ub), 16u);
+  var vec2_i32 : vec2<i32> = tint_symbol_5(&(ub), 24u);
+  var vec2_u32 : vec2<u32> = tint_symbol_6(&(ub), 32u);
+  var vec2_f16 : vec2<f16> = tint_symbol_7(&(ub), 40u);
+  var vec3_f32 : vec3<f32> = tint_symbol_8(&(ub), 48u);
+  var vec3_i32 : vec3<i32> = tint_symbol_9(&(ub), 64u);
+  var vec3_u32 : vec3<u32> = tint_symbol_10(&(ub), 80u);
+  var vec3_f16 : vec3<f16> = tint_symbol_11(&(ub), 96u);
+  var vec4_f32 : vec4<f32> = tint_symbol_12(&(ub), 112u);
+  var vec4_i32 : vec4<i32> = tint_symbol_13(&(ub), 128u);
+  var vec4_u32 : vec4<u32> = tint_symbol_14(&(ub), 144u);
+  var vec4_f16 : vec4<f16> = tint_symbol_15(&(ub), 160u);
+  var mat2x2_f32 : mat2x2<f32> = tint_symbol_16(&(ub), 168u);
+  var mat2x3_f32 : mat2x3<f32> = tint_symbol_17(&(ub), 192u);
+  var mat2x4_f32 : mat2x4<f32> = tint_symbol_18(&(ub), 224u);
+  var mat3x2_f32 : mat3x2<f32> = tint_symbol_19(&(ub), 256u);
+  var mat3x3_f32 : mat3x3<f32> = tint_symbol_20(&(ub), 288u);
+  var mat3x4_f32 : mat3x4<f32> = tint_symbol_21(&(ub), 336u);
+  var mat4x2_f32 : mat4x2<f32> = tint_symbol_22(&(ub), 384u);
+  var mat4x3_f32 : mat4x3<f32> = tint_symbol_23(&(ub), 416u);
+  var mat4x4_f32 : mat4x4<f32> = tint_symbol_24(&(ub), 480u);
+  var mat2x2_f16 : mat2x2<f16> = tint_symbol_25(&(ub), 544u);
+  var mat2x3_f16 : mat2x3<f16> = tint_symbol_26(&(ub), 552u);
+  var mat2x4_f16 : mat2x4<f16> = tint_symbol_27(&(ub), 568u);
+  var mat3x2_f16 : mat3x2<f16> = tint_symbol_28(&(ub), 584u);
+  var mat3x3_f16 : mat3x3<f16> = tint_symbol_29(&(ub), 600u);
+  var mat3x4_f16 : mat3x4<f16> = tint_symbol_30(&(ub), 624u);
+  var mat4x2_f16 : mat4x2<f16> = tint_symbol_31(&(ub), 648u);
+  var mat4x3_f16 : mat4x3<f16> = tint_symbol_32(&(ub), 664u);
+  var mat4x4_f16 : mat4x4<f16> = tint_symbol_33(&(ub), 696u);
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = tint_symbol_34(&(ub), 736u);
+  var arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = tint_symbol_35(&(ub), 768u);
 }
 )";
 
@@ -639,192 +987,308 @@
 
 TEST_F(DecomposeMemoryAccessTest, UB_BasicLoad_OutOfOrder) {
     auto* src = R"(
+enable f16;
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = ub.a;
-  var b : u32 = ub.b;
-  var c : f32 = ub.c;
-  var d : vec2<i32> = ub.d;
-  var e : vec2<u32> = ub.e;
-  var f : vec2<f32> = ub.f;
-  var g : vec3<i32> = ub.g;
-  var h : vec3<u32> = ub.h;
-  var i : vec3<f32> = ub.i;
-  var j : vec4<i32> = ub.j;
-  var k : vec4<u32> = ub.k;
-  var l : vec4<f32> = ub.l;
-  var m : mat2x2<f32> = ub.m;
-  var n : mat2x3<f32> = ub.n;
-  var o : mat2x4<f32> = ub.o;
-  var p : mat3x2<f32> = ub.p;
-  var q : mat3x3<f32> = ub.q;
-  var r : mat3x4<f32> = ub.r;
-  var s : mat4x2<f32> = ub.s;
-  var t : mat4x3<f32> = ub.t;
-  var u : mat4x4<f32> = ub.u;
-  var v : array<vec3<f32>, 2> = ub.v;
+  var scalar_f32 : f32 = ub.scalar_f32;
+  var scalar_i32 : i32 = ub.scalar_i32;
+  var scalar_u32 : u32 = ub.scalar_u32;
+  var scalar_f16 : f16 = ub.scalar_f16;
+  var vec2_f32 : vec2<f32> = ub.vec2_f32;
+  var vec2_i32 : vec2<i32> = ub.vec2_i32;
+  var vec2_u32 : vec2<u32> = ub.vec2_u32;
+  var vec2_f16 : vec2<f16> = ub.vec2_f16;
+  var vec3_f32 : vec3<f32> = ub.vec3_f32;
+  var vec3_i32 : vec3<i32> = ub.vec3_i32;
+  var vec3_u32 : vec3<u32> = ub.vec3_u32;
+  var vec3_f16 : vec3<f16> = ub.vec3_f16;
+  var vec4_f32 : vec4<f32> = ub.vec4_f32;
+  var vec4_i32 : vec4<i32> = ub.vec4_i32;
+  var vec4_u32 : vec4<u32> = ub.vec4_u32;
+  var vec4_f16 : vec4<f16> = ub.vec4_f16;
+  var mat2x2_f32 : mat2x2<f32> = ub.mat2x2_f32;
+  var mat2x3_f32 : mat2x3<f32> = ub.mat2x3_f32;
+  var mat2x4_f32 : mat2x4<f32> = ub.mat2x4_f32;
+  var mat3x2_f32 : mat3x2<f32> = ub.mat3x2_f32;
+  var mat3x3_f32 : mat3x3<f32> = ub.mat3x3_f32;
+  var mat3x4_f32 : mat3x4<f32> = ub.mat3x4_f32;
+  var mat4x2_f32 : mat4x2<f32> = ub.mat4x2_f32;
+  var mat4x3_f32 : mat4x3<f32> = ub.mat4x3_f32;
+  var mat4x4_f32 : mat4x4<f32> = ub.mat4x4_f32;
+  var mat2x2_f16 : mat2x2<f16> = ub.mat2x2_f16;
+  var mat2x3_f16 : mat2x3<f16> = ub.mat2x3_f16;
+  var mat2x4_f16 : mat2x4<f16> = ub.mat2x4_f16;
+  var mat3x2_f16 : mat3x2<f16> = ub.mat3x2_f16;
+  var mat3x3_f16 : mat3x3<f16> = ub.mat3x3_f16;
+  var mat3x4_f16 : mat3x4<f16> = ub.mat3x4_f16;
+  var mat4x2_f16 : mat4x2<f16> = ub.mat4x2_f16;
+  var mat4x3_f16 : mat4x3<f16> = ub.mat4x3_f16;
+  var mat4x4_f16 : mat4x4<f16> = ub.mat4x4_f16;
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = ub.arr2_vec3_f32;
+  var arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = ub.arr2_mat4x2_f16;
 }
 
 @group(0) @binding(0) var<uniform> ub : UB;
 
 struct UB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 )";
 
     auto* expect = R"(
-@internal(intrinsic_load_uniform_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> i32
-
-@internal(intrinsic_load_uniform_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> u32
+enable f16;
 
 @internal(intrinsic_load_uniform_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f32
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f32
 
-@internal(intrinsic_load_uniform_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<i32>
+@internal(intrinsic_load_uniform_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> i32
 
-@internal(intrinsic_load_uniform_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_uniform_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> u32
+
+@internal(intrinsic_load_uniform_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> f16
 
 @internal(intrinsic_load_uniform_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f32>
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_uniform_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_uniform_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_uniform_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_uniform_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_uniform_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_uniform_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_uniform_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_uniform_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_uniform_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_uniform_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_uniform_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_uniform_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f32>
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
+@internal(intrinsic_load_uniform_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_uniform_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_uniform_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)));
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)));
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_4(buffer, (offset + 0u)), tint_symbol_4(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_4(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 16u)), tint_symbol_8(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 32u)), tint_symbol_11(buffer, (offset + 48u)));
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_7(buffer, (offset + 0u)), tint_symbol_7(buffer, (offset + 4u)), tint_symbol_7(buffer, (offset + 8u)), tint_symbol_7(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_11(buffer, (offset + 0u)), tint_symbol_11(buffer, (offset + 8u)), tint_symbol_11(buffer, (offset + 16u)), tint_symbol_11(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_15(buffer, (offset + 0u)), tint_symbol_15(buffer, (offset + 8u)), tint_symbol_15(buffer, (offset + 16u)), tint_symbol_15(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_8(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_8(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<uniform, UB, read>, offset : u32) -> array<mat4x2<f16>, 2u> {
+  var arr_1 : array<mat4x2<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_31(buffer, (offset + (i_1 * 16u)));
+  }
+  return arr_1;
+}
+
 @compute @workgroup_size(1)
 fn main() {
-  var a : i32 = tint_symbol(&(ub), 0u);
-  var b : u32 = tint_symbol_1(&(ub), 4u);
-  var c : f32 = tint_symbol_2(&(ub), 8u);
-  var d : vec2<i32> = tint_symbol_3(&(ub), 16u);
-  var e : vec2<u32> = tint_symbol_4(&(ub), 24u);
-  var f : vec2<f32> = tint_symbol_5(&(ub), 32u);
-  var g : vec3<i32> = tint_symbol_6(&(ub), 48u);
-  var h : vec3<u32> = tint_symbol_7(&(ub), 64u);
-  var i : vec3<f32> = tint_symbol_8(&(ub), 80u);
-  var j : vec4<i32> = tint_symbol_9(&(ub), 96u);
-  var k : vec4<u32> = tint_symbol_10(&(ub), 112u);
-  var l : vec4<f32> = tint_symbol_11(&(ub), 128u);
-  var m : mat2x2<f32> = tint_symbol_12(&(ub), 144u);
-  var n : mat2x3<f32> = tint_symbol_13(&(ub), 160u);
-  var o : mat2x4<f32> = tint_symbol_14(&(ub), 192u);
-  var p : mat3x2<f32> = tint_symbol_15(&(ub), 224u);
-  var q : mat3x3<f32> = tint_symbol_16(&(ub), 256u);
-  var r : mat3x4<f32> = tint_symbol_17(&(ub), 304u);
-  var s : mat4x2<f32> = tint_symbol_18(&(ub), 352u);
-  var t : mat4x3<f32> = tint_symbol_19(&(ub), 384u);
-  var u : mat4x4<f32> = tint_symbol_20(&(ub), 448u);
-  var v : array<vec3<f32>, 2> = tint_symbol_21(&(ub), 512u);
+  var scalar_f32 : f32 = tint_symbol(&(ub), 0u);
+  var scalar_i32 : i32 = tint_symbol_1(&(ub), 4u);
+  var scalar_u32 : u32 = tint_symbol_2(&(ub), 8u);
+  var scalar_f16 : f16 = tint_symbol_3(&(ub), 12u);
+  var vec2_f32 : vec2<f32> = tint_symbol_4(&(ub), 16u);
+  var vec2_i32 : vec2<i32> = tint_symbol_5(&(ub), 24u);
+  var vec2_u32 : vec2<u32> = tint_symbol_6(&(ub), 32u);
+  var vec2_f16 : vec2<f16> = tint_symbol_7(&(ub), 40u);
+  var vec3_f32 : vec3<f32> = tint_symbol_8(&(ub), 48u);
+  var vec3_i32 : vec3<i32> = tint_symbol_9(&(ub), 64u);
+  var vec3_u32 : vec3<u32> = tint_symbol_10(&(ub), 80u);
+  var vec3_f16 : vec3<f16> = tint_symbol_11(&(ub), 96u);
+  var vec4_f32 : vec4<f32> = tint_symbol_12(&(ub), 112u);
+  var vec4_i32 : vec4<i32> = tint_symbol_13(&(ub), 128u);
+  var vec4_u32 : vec4<u32> = tint_symbol_14(&(ub), 144u);
+  var vec4_f16 : vec4<f16> = tint_symbol_15(&(ub), 160u);
+  var mat2x2_f32 : mat2x2<f32> = tint_symbol_16(&(ub), 168u);
+  var mat2x3_f32 : mat2x3<f32> = tint_symbol_17(&(ub), 192u);
+  var mat2x4_f32 : mat2x4<f32> = tint_symbol_18(&(ub), 224u);
+  var mat3x2_f32 : mat3x2<f32> = tint_symbol_19(&(ub), 256u);
+  var mat3x3_f32 : mat3x3<f32> = tint_symbol_20(&(ub), 288u);
+  var mat3x4_f32 : mat3x4<f32> = tint_symbol_21(&(ub), 336u);
+  var mat4x2_f32 : mat4x2<f32> = tint_symbol_22(&(ub), 384u);
+  var mat4x3_f32 : mat4x3<f32> = tint_symbol_23(&(ub), 416u);
+  var mat4x4_f32 : mat4x4<f32> = tint_symbol_24(&(ub), 480u);
+  var mat2x2_f16 : mat2x2<f16> = tint_symbol_25(&(ub), 544u);
+  var mat2x3_f16 : mat2x3<f16> = tint_symbol_26(&(ub), 552u);
+  var mat2x4_f16 : mat2x4<f16> = tint_symbol_27(&(ub), 568u);
+  var mat3x2_f16 : mat3x2<f16> = tint_symbol_28(&(ub), 584u);
+  var mat3x3_f16 : mat3x3<f16> = tint_symbol_29(&(ub), 600u);
+  var mat3x4_f16 : mat3x4<f16> = tint_symbol_30(&(ub), 624u);
+  var mat4x2_f16 : mat4x2<f16> = tint_symbol_31(&(ub), 648u);
+  var mat4x3_f16 : mat4x3<f16> = tint_symbol_32(&(ub), 664u);
+  var mat4x4_f16 : mat4x4<f16> = tint_symbol_33(&(ub), 696u);
+  var arr2_vec3_f32 : array<vec3<f32>, 2> = tint_symbol_34(&(ub), 736u);
+  var arr2_mat4x2_f16 : array<mat4x2<f16>, 2> = tint_symbol_35(&(ub), 768u);
 }
 
 @group(0) @binding(0) var<uniform> ub : UB;
 
 struct UB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 )";
 
@@ -835,209 +1299,342 @@
 
 TEST_F(DecomposeMemoryAccessTest, SB_BasicStore) {
     auto* src = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 @compute @workgroup_size(1)
 fn main() {
-  sb.a = i32();
-  sb.b = u32();
-  sb.c = f32();
-  sb.d = vec2<i32>();
-  sb.e = vec2<u32>();
-  sb.f = vec2<f32>();
-  sb.g = vec3<i32>();
-  sb.h = vec3<u32>();
-  sb.i = vec3<f32>();
-  sb.j = vec4<i32>();
-  sb.k = vec4<u32>();
-  sb.l = vec4<f32>();
-  sb.m = mat2x2<f32>();
-  sb.n = mat2x3<f32>();
-  sb.o = mat2x4<f32>();
-  sb.p = mat3x2<f32>();
-  sb.q = mat3x3<f32>();
-  sb.r = mat3x4<f32>();
-  sb.s = mat4x2<f32>();
-  sb.t = mat4x3<f32>();
-  sb.u = mat4x4<f32>();
-  sb.v = array<vec3<f32>, 2>();
+  sb.scalar_f32 = f32();
+  sb.scalar_i32 = i32();
+  sb.scalar_u32 = u32();
+  sb.scalar_f16 = f16();
+  sb.vec2_f32 = vec2<f32>();
+  sb.vec2_i32 = vec2<i32>();
+  sb.vec2_u32 = vec2<u32>();
+  sb.vec2_f16 = vec2<f16>();
+  sb.vec3_f32 = vec3<f32>();
+  sb.vec3_i32 = vec3<i32>();
+  sb.vec3_u32 = vec3<u32>();
+  sb.vec3_f16 = vec3<f16>();
+  sb.vec4_f32 = vec4<f32>();
+  sb.vec4_i32 = vec4<i32>();
+  sb.vec4_u32 = vec4<u32>();
+  sb.vec4_f16 = vec4<f16>();
+  sb.mat2x2_f32 = mat2x2<f32>();
+  sb.mat2x3_f32 = mat2x3<f32>();
+  sb.mat2x4_f32 = mat2x4<f32>();
+  sb.mat3x2_f32 = mat3x2<f32>();
+  sb.mat3x3_f32 = mat3x3<f32>();
+  sb.mat3x4_f32 = mat3x4<f32>();
+  sb.mat4x2_f32 = mat4x2<f32>();
+  sb.mat4x3_f32 = mat4x3<f32>();
+  sb.mat4x4_f32 = mat4x4<f32>();
+  sb.mat2x2_f16 = mat2x2<f16>();
+  sb.mat2x3_f16 = mat2x3<f16>();
+  sb.mat2x4_f16 = mat2x4<f16>();
+  sb.mat3x2_f16 = mat3x2<f16>();
+  sb.mat3x3_f16 = mat3x3<f16>();
+  sb.mat3x4_f16 = mat3x4<f16>();
+  sb.mat4x2_f16 = mat4x2<f16>();
+  sb.mat4x3_f16 = mat4x3<f16>();
+  sb.mat4x4_f16 = mat4x4<f16>();
+  sb.arr2_vec3_f32 = array<vec3<f32>, 2>();
+  sb.arr2_mat4x2_f16 = array<mat4x2<f16>, 2>();
 }
 )";
 
     auto* expect = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
+@internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
+
 @internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
 
 @internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
 
-@internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
-
-@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
-
-@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+@internal(intrinsic_store_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f16)
 
 @internal(intrinsic_store_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
 
-@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
+@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
 
-@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+
+@internal(intrinsic_store_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f16>)
 
 @internal(intrinsic_store_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f32>)
 
-@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
 
-@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+
+@internal(intrinsic_store_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f16>)
 
 @internal(intrinsic_store_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+
+@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+
+@internal(intrinsic_store_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f16>)
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
-  tint_symbol_5(buffer, (offset + 16u), value[2u]);
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
+  tint_symbol_4(buffer, (offset + 16u), value[2u]);
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
   tint_symbol_8(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
-  tint_symbol_11(buffer, (offset + 32u), value[2u]);
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+  tint_symbol_12(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
-  tint_symbol_5(buffer, (offset + 16u), value[2u]);
-  tint_symbol_5(buffer, (offset + 24u), value[3u]);
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
+  tint_symbol_4(buffer, (offset + 16u), value[2u]);
+  tint_symbol_4(buffer, (offset + 24u), value[3u]);
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
   tint_symbol_8(buffer, (offset + 32u), value[2u]);
   tint_symbol_8(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
-  tint_symbol_11(buffer, (offset + 32u), value[2u]);
-  tint_symbol_11(buffer, (offset + 48u), value[3u]);
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+  tint_symbol_12(buffer, (offset + 32u), value[2u]);
+  tint_symbol_12(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+  tint_symbol_7(buffer, (offset + 8u), value[2u]);
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+  tint_symbol_11(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+  tint_symbol_15(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+  tint_symbol_7(buffer, (offset + 8u), value[2u]);
+  tint_symbol_7(buffer, (offset + 12u), value[3u]);
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+  tint_symbol_11(buffer, (offset + 16u), value[2u]);
+  tint_symbol_11(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+  tint_symbol_15(buffer, (offset + 16u), value[2u]);
+  tint_symbol_15(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
   var array = value;
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    tint_symbol_8(buffer, (offset + (i * 16u)), array[i]);
+  }
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<mat4x2<f16>, 2u>) {
+  var array_1 = value;
   for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    tint_symbol_8(buffer, (offset + (i_1 * 16u)), array[i_1]);
+    tint_symbol_31(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
   }
 }
 
 @compute @workgroup_size(1)
 fn main() {
-  tint_symbol(&(sb), 0u, i32());
-  tint_symbol_1(&(sb), 4u, u32());
-  tint_symbol_2(&(sb), 8u, f32());
-  tint_symbol_3(&(sb), 16u, vec2<i32>());
-  tint_symbol_4(&(sb), 24u, vec2<u32>());
-  tint_symbol_5(&(sb), 32u, vec2<f32>());
-  tint_symbol_6(&(sb), 48u, vec3<i32>());
-  tint_symbol_7(&(sb), 64u, vec3<u32>());
-  tint_symbol_8(&(sb), 80u, vec3<f32>());
-  tint_symbol_9(&(sb), 96u, vec4<i32>());
-  tint_symbol_10(&(sb), 112u, vec4<u32>());
-  tint_symbol_11(&(sb), 128u, vec4<f32>());
-  tint_symbol_12(&(sb), 144u, mat2x2<f32>());
-  tint_symbol_13(&(sb), 160u, mat2x3<f32>());
-  tint_symbol_14(&(sb), 192u, mat2x4<f32>());
-  tint_symbol_15(&(sb), 224u, mat3x2<f32>());
-  tint_symbol_16(&(sb), 256u, mat3x3<f32>());
-  tint_symbol_17(&(sb), 304u, mat3x4<f32>());
-  tint_symbol_18(&(sb), 352u, mat4x2<f32>());
-  tint_symbol_19(&(sb), 384u, mat4x3<f32>());
-  tint_symbol_20(&(sb), 448u, mat4x4<f32>());
-  tint_symbol_21(&(sb), 512u, array<vec3<f32>, 2>());
+  tint_symbol(&(sb), 0u, f32());
+  tint_symbol_1(&(sb), 4u, i32());
+  tint_symbol_2(&(sb), 8u, u32());
+  tint_symbol_3(&(sb), 12u, f16());
+  tint_symbol_4(&(sb), 16u, vec2<f32>());
+  tint_symbol_5(&(sb), 24u, vec2<i32>());
+  tint_symbol_6(&(sb), 32u, vec2<u32>());
+  tint_symbol_7(&(sb), 40u, vec2<f16>());
+  tint_symbol_8(&(sb), 48u, vec3<f32>());
+  tint_symbol_9(&(sb), 64u, vec3<i32>());
+  tint_symbol_10(&(sb), 80u, vec3<u32>());
+  tint_symbol_11(&(sb), 96u, vec3<f16>());
+  tint_symbol_12(&(sb), 112u, vec4<f32>());
+  tint_symbol_13(&(sb), 128u, vec4<i32>());
+  tint_symbol_14(&(sb), 144u, vec4<u32>());
+  tint_symbol_15(&(sb), 160u, vec4<f16>());
+  tint_symbol_16(&(sb), 168u, mat2x2<f32>());
+  tint_symbol_17(&(sb), 192u, mat2x3<f32>());
+  tint_symbol_18(&(sb), 224u, mat2x4<f32>());
+  tint_symbol_19(&(sb), 256u, mat3x2<f32>());
+  tint_symbol_20(&(sb), 288u, mat3x3<f32>());
+  tint_symbol_21(&(sb), 336u, mat3x4<f32>());
+  tint_symbol_22(&(sb), 384u, mat4x2<f32>());
+  tint_symbol_23(&(sb), 416u, mat4x3<f32>());
+  tint_symbol_24(&(sb), 480u, mat4x4<f32>());
+  tint_symbol_25(&(sb), 544u, mat2x2<f16>());
+  tint_symbol_26(&(sb), 552u, mat2x3<f16>());
+  tint_symbol_27(&(sb), 568u, mat2x4<f16>());
+  tint_symbol_28(&(sb), 584u, mat3x2<f16>());
+  tint_symbol_29(&(sb), 600u, mat3x3<f16>());
+  tint_symbol_30(&(sb), 624u, mat3x4<f16>());
+  tint_symbol_31(&(sb), 648u, mat4x2<f16>());
+  tint_symbol_32(&(sb), 664u, mat4x3<f16>());
+  tint_symbol_33(&(sb), 696u, mat4x4<f16>());
+  tint_symbol_34(&(sb), 736u, array<vec3<f32>, 2>());
+  tint_symbol_35(&(sb), 768u, array<mat4x2<f16>, 2>());
 }
 )";
 
@@ -1048,209 +1645,342 @@
 
 TEST_F(DecomposeMemoryAccessTest, SB_BasicStore_OutOfOrder) {
     auto* src = R"(
+enable f16;
+
 @compute @workgroup_size(1)
 fn main() {
-  sb.a = i32();
-  sb.b = u32();
-  sb.c = f32();
-  sb.d = vec2<i32>();
-  sb.e = vec2<u32>();
-  sb.f = vec2<f32>();
-  sb.g = vec3<i32>();
-  sb.h = vec3<u32>();
-  sb.i = vec3<f32>();
-  sb.j = vec4<i32>();
-  sb.k = vec4<u32>();
-  sb.l = vec4<f32>();
-  sb.m = mat2x2<f32>();
-  sb.n = mat2x3<f32>();
-  sb.o = mat2x4<f32>();
-  sb.p = mat3x2<f32>();
-  sb.q = mat3x3<f32>();
-  sb.r = mat3x4<f32>();
-  sb.s = mat4x2<f32>();
-  sb.t = mat4x3<f32>();
-  sb.u = mat4x4<f32>();
-  sb.v = array<vec3<f32>, 2>();
+  sb.scalar_f32 = f32();
+  sb.scalar_i32 = i32();
+  sb.scalar_u32 = u32();
+  sb.scalar_f16 = f16();
+  sb.vec2_f32 = vec2<f32>();
+  sb.vec2_i32 = vec2<i32>();
+  sb.vec2_u32 = vec2<u32>();
+  sb.vec2_f16 = vec2<f16>();
+  sb.vec3_f32 = vec3<f32>();
+  sb.vec3_i32 = vec3<i32>();
+  sb.vec3_u32 = vec3<u32>();
+  sb.vec3_f16 = vec3<f16>();
+  sb.vec4_f32 = vec4<f32>();
+  sb.vec4_i32 = vec4<i32>();
+  sb.vec4_u32 = vec4<u32>();
+  sb.vec4_f16 = vec4<f16>();
+  sb.mat2x2_f32 = mat2x2<f32>();
+  sb.mat2x3_f32 = mat2x3<f32>();
+  sb.mat2x4_f32 = mat2x4<f32>();
+  sb.mat3x2_f32 = mat3x2<f32>();
+  sb.mat3x3_f32 = mat3x3<f32>();
+  sb.mat3x4_f32 = mat3x4<f32>();
+  sb.mat4x2_f32 = mat4x2<f32>();
+  sb.mat4x3_f32 = mat4x3<f32>();
+  sb.mat4x4_f32 = mat4x4<f32>();
+  sb.mat2x2_f16 = mat2x2<f16>();
+  sb.mat2x3_f16 = mat2x3<f16>();
+  sb.mat2x4_f16 = mat2x4<f16>();
+  sb.mat3x2_f16 = mat3x2<f16>();
+  sb.mat3x3_f16 = mat3x3<f16>();
+  sb.mat3x4_f16 = mat3x4<f16>();
+  sb.mat4x2_f16 = mat4x2<f16>();
+  sb.mat4x3_f16 = mat4x3<f16>();
+  sb.mat4x4_f16 = mat4x4<f16>();
+  sb.arr2_vec3_f32 = array<vec3<f32>, 2>();
+  sb.arr2_mat4x2_f16 = array<mat4x2<f16>, 2>();
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 )";
 
     auto* expect = R"(
-@internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
-
-@internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+enable f16;
 
 @internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
+fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
 
-@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
+@internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
 
-@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+@internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+
+@internal(intrinsic_store_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f16)
 
 @internal(intrinsic_store_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
 
-@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
+@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
 
-@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+
+@internal(intrinsic_store_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f16>)
 
 @internal(intrinsic_store_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f32>)
 
-@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
 
-@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+
+@internal(intrinsic_store_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f16>)
 
 @internal(intrinsic_store_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
 
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+
+@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+
+@internal(intrinsic_store_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f16>)
+
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
 }
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
-  tint_symbol_5(buffer, (offset + 16u), value[2u]);
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
+  tint_symbol_4(buffer, (offset + 16u), value[2u]);
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
   tint_symbol_8(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
-  tint_symbol_11(buffer, (offset + 32u), value[2u]);
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+  tint_symbol_12(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
-  tint_symbol_5(buffer, (offset + 0u), value[0u]);
-  tint_symbol_5(buffer, (offset + 8u), value[1u]);
-  tint_symbol_5(buffer, (offset + 16u), value[2u]);
-  tint_symbol_5(buffer, (offset + 24u), value[3u]);
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
+  tint_symbol_4(buffer, (offset + 0u), value[0u]);
+  tint_symbol_4(buffer, (offset + 8u), value[1u]);
+  tint_symbol_4(buffer, (offset + 16u), value[2u]);
+  tint_symbol_4(buffer, (offset + 24u), value[3u]);
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
   tint_symbol_8(buffer, (offset + 0u), value[0u]);
   tint_symbol_8(buffer, (offset + 16u), value[1u]);
   tint_symbol_8(buffer, (offset + 32u), value[2u]);
   tint_symbol_8(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
-  tint_symbol_11(buffer, (offset + 0u), value[0u]);
-  tint_symbol_11(buffer, (offset + 16u), value[1u]);
-  tint_symbol_11(buffer, (offset + 32u), value[2u]);
-  tint_symbol_11(buffer, (offset + 48u), value[3u]);
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+  tint_symbol_12(buffer, (offset + 32u), value[2u]);
+  tint_symbol_12(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+}
+
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+  tint_symbol_7(buffer, (offset + 8u), value[2u]);
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+  tint_symbol_11(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+  tint_symbol_15(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f16>) {
+  tint_symbol_7(buffer, (offset + 0u), value[0u]);
+  tint_symbol_7(buffer, (offset + 4u), value[1u]);
+  tint_symbol_7(buffer, (offset + 8u), value[2u]);
+  tint_symbol_7(buffer, (offset + 12u), value[3u]);
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f16>) {
+  tint_symbol_11(buffer, (offset + 0u), value[0u]);
+  tint_symbol_11(buffer, (offset + 8u), value[1u]);
+  tint_symbol_11(buffer, (offset + 16u), value[2u]);
+  tint_symbol_11(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f16>) {
+  tint_symbol_15(buffer, (offset + 0u), value[0u]);
+  tint_symbol_15(buffer, (offset + 8u), value[1u]);
+  tint_symbol_15(buffer, (offset + 16u), value[2u]);
+  tint_symbol_15(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
   var array = value;
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    tint_symbol_8(buffer, (offset + (i * 16u)), array[i]);
+  }
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<mat4x2<f16>, 2u>) {
+  var array_1 = value;
   for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    tint_symbol_8(buffer, (offset + (i_1 * 16u)), array[i_1]);
+    tint_symbol_31(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
   }
 }
 
 @compute @workgroup_size(1)
 fn main() {
-  tint_symbol(&(sb), 0u, i32());
-  tint_symbol_1(&(sb), 4u, u32());
-  tint_symbol_2(&(sb), 8u, f32());
-  tint_symbol_3(&(sb), 16u, vec2<i32>());
-  tint_symbol_4(&(sb), 24u, vec2<u32>());
-  tint_symbol_5(&(sb), 32u, vec2<f32>());
-  tint_symbol_6(&(sb), 48u, vec3<i32>());
-  tint_symbol_7(&(sb), 64u, vec3<u32>());
-  tint_symbol_8(&(sb), 80u, vec3<f32>());
-  tint_symbol_9(&(sb), 96u, vec4<i32>());
-  tint_symbol_10(&(sb), 112u, vec4<u32>());
-  tint_symbol_11(&(sb), 128u, vec4<f32>());
-  tint_symbol_12(&(sb), 144u, mat2x2<f32>());
-  tint_symbol_13(&(sb), 160u, mat2x3<f32>());
-  tint_symbol_14(&(sb), 192u, mat2x4<f32>());
-  tint_symbol_15(&(sb), 224u, mat3x2<f32>());
-  tint_symbol_16(&(sb), 256u, mat3x3<f32>());
-  tint_symbol_17(&(sb), 304u, mat3x4<f32>());
-  tint_symbol_18(&(sb), 352u, mat4x2<f32>());
-  tint_symbol_19(&(sb), 384u, mat4x3<f32>());
-  tint_symbol_20(&(sb), 448u, mat4x4<f32>());
-  tint_symbol_21(&(sb), 512u, array<vec3<f32>, 2>());
+  tint_symbol(&(sb), 0u, f32());
+  tint_symbol_1(&(sb), 4u, i32());
+  tint_symbol_2(&(sb), 8u, u32());
+  tint_symbol_3(&(sb), 12u, f16());
+  tint_symbol_4(&(sb), 16u, vec2<f32>());
+  tint_symbol_5(&(sb), 24u, vec2<i32>());
+  tint_symbol_6(&(sb), 32u, vec2<u32>());
+  tint_symbol_7(&(sb), 40u, vec2<f16>());
+  tint_symbol_8(&(sb), 48u, vec3<f32>());
+  tint_symbol_9(&(sb), 64u, vec3<i32>());
+  tint_symbol_10(&(sb), 80u, vec3<u32>());
+  tint_symbol_11(&(sb), 96u, vec3<f16>());
+  tint_symbol_12(&(sb), 112u, vec4<f32>());
+  tint_symbol_13(&(sb), 128u, vec4<i32>());
+  tint_symbol_14(&(sb), 144u, vec4<u32>());
+  tint_symbol_15(&(sb), 160u, vec4<f16>());
+  tint_symbol_16(&(sb), 168u, mat2x2<f32>());
+  tint_symbol_17(&(sb), 192u, mat2x3<f32>());
+  tint_symbol_18(&(sb), 224u, mat2x4<f32>());
+  tint_symbol_19(&(sb), 256u, mat3x2<f32>());
+  tint_symbol_20(&(sb), 288u, mat3x3<f32>());
+  tint_symbol_21(&(sb), 336u, mat3x4<f32>());
+  tint_symbol_22(&(sb), 384u, mat4x2<f32>());
+  tint_symbol_23(&(sb), 416u, mat4x3<f32>());
+  tint_symbol_24(&(sb), 480u, mat4x4<f32>());
+  tint_symbol_25(&(sb), 544u, mat2x2<f16>());
+  tint_symbol_26(&(sb), 552u, mat2x3<f16>());
+  tint_symbol_27(&(sb), 568u, mat2x4<f16>());
+  tint_symbol_28(&(sb), 584u, mat3x2<f16>());
+  tint_symbol_29(&(sb), 600u, mat3x3<f16>());
+  tint_symbol_30(&(sb), 624u, mat3x4<f16>());
+  tint_symbol_31(&(sb), 648u, mat4x2<f16>());
+  tint_symbol_32(&(sb), 664u, mat4x3<f16>());
+  tint_symbol_33(&(sb), 696u, mat4x4<f16>());
+  tint_symbol_34(&(sb), 736u, array<vec3<f32>, 2>());
+  tint_symbol_35(&(sb), 768u, array<mat4x2<f16>, 2>());
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 )";
 
@@ -1261,29 +1991,45 @@
 
 TEST_F(DecomposeMemoryAccessTest, LoadStructure) {
     auto* src = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
@@ -1295,115 +2041,187 @@
 )";
 
     auto* expect = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
+@internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
+
 @internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
 
 @internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
 
-@internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
-
-@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
-
-@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f16
 
 @internal(intrinsic_load_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)));
+@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)), tint_symbol_6(buffer, (offset + 16u)));
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)), tint_symbol_9(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)), tint_symbol_13(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)), tint_symbol_6(buffer, (offset + 16u)), tint_symbol_6(buffer, (offset + 24u)));
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)), tint_symbol_9(buffer, (offset + 32u)), tint_symbol_9(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)), tint_symbol_13(buffer, (offset + 32u)), tint_symbol_13(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)), tint_symbol_8(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)), tint_symbol_12(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)), tint_symbol_16(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)), tint_symbol_8(buffer, (offset + 8u)), tint_symbol_8(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)), tint_symbol_16(buffer, (offset + 16u)), tint_symbol_16(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_9(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_9(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_36(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<mat4x2<f16>, 2u> {
+  var arr_1 : array<mat4x2<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_32(buffer, (offset + (i_1 * 16u)));
+  }
+  return arr_1;
+}
+
 fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> SB {
-  return SB(tint_symbol_1(buffer, (offset + 0u)), tint_symbol_2(buffer, (offset + 4u)), tint_symbol_3(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)), tint_symbol_6(buffer, (offset + 32u)), tint_symbol_7(buffer, (offset + 48u)), tint_symbol_8(buffer, (offset + 64u)), tint_symbol_9(buffer, (offset + 80u)), tint_symbol_10(buffer, (offset + 96u)), tint_symbol_11(buffer, (offset + 112u)), tint_symbol_12(buffer, (offset + 128u)), tint_symbol_13(buffer, (offset + 144u)), tint_symbol_14(buffer, (offset + 160u)), tint_symbol_15(buffer, (offset + 192u)), tint_symbol_16(buffer, (offset + 224u)), tint_symbol_17(buffer, (offset + 256u)), tint_symbol_18(buffer, (offset + 304u)), tint_symbol_19(buffer, (offset + 352u)), tint_symbol_20(buffer, (offset + 384u)), tint_symbol_21(buffer, (offset + 448u)), tint_symbol_22(buffer, (offset + 512u)));
+  return SB(tint_symbol_1(buffer, (offset + 0u)), tint_symbol_2(buffer, (offset + 4u)), tint_symbol_3(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 12u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_6(buffer, (offset + 24u)), tint_symbol_7(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 40u)), tint_symbol_9(buffer, (offset + 48u)), tint_symbol_10(buffer, (offset + 64u)), tint_symbol_11(buffer, (offset + 80u)), tint_symbol_12(buffer, (offset + 96u)), tint_symbol_13(buffer, (offset + 112u)), tint_symbol_14(buffer, (offset + 128u)), tint_symbol_15(buffer, (offset + 144u)), tint_symbol_16(buffer, (offset + 160u)), tint_symbol_17(buffer, (offset + 168u)), tint_symbol_18(buffer, (offset + 192u)), tint_symbol_19(buffer, (offset + 224u)), tint_symbol_20(buffer, (offset + 256u)), tint_symbol_21(buffer, (offset + 288u)), tint_symbol_22(buffer, (offset + 336u)), tint_symbol_23(buffer, (offset + 384u)), tint_symbol_24(buffer, (offset + 416u)), tint_symbol_25(buffer, (offset + 480u)), tint_symbol_26(buffer, (offset + 544u)), tint_symbol_27(buffer, (offset + 552u)), tint_symbol_28(buffer, (offset + 568u)), tint_symbol_29(buffer, (offset + 584u)), tint_symbol_30(buffer, (offset + 600u)), tint_symbol_31(buffer, (offset + 624u)), tint_symbol_32(buffer, (offset + 648u)), tint_symbol_33(buffer, (offset + 664u)), tint_symbol_34(buffer, (offset + 696u)), tint_symbol_35(buffer, (offset + 736u)), tint_symbol_36(buffer, (offset + 768u)));
 }
 
 @compute @workgroup_size(1)
@@ -1419,6 +2237,8 @@
 
 TEST_F(DecomposeMemoryAccessTest, LoadStructure_OutOfOrder) {
     auto* src = R"(
+enable f16;
+
 @compute @workgroup_size(1)
 fn main() {
   var x : SB = sb;
@@ -1427,114 +2247,186 @@
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 )";
 
     auto* expect = R"(
-@internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
-
-@internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+enable f16;
 
 @internal(intrinsic_load_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f32
 
-@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
+@internal(intrinsic_load_storage_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> i32
 
-@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+@internal(intrinsic_load_storage_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> u32
+
+@internal(intrinsic_load_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> f16
 
 @internal(intrinsic_load_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f32>
 
-@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
+@internal(intrinsic_load_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<i32>
 
-@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+@internal(intrinsic_load_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<u32>
+
+@internal(intrinsic_load_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec2<f16>
 
 @internal(intrinsic_load_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f32>
 
-@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+@internal(intrinsic_load_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<i32>
 
-@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+@internal(intrinsic_load_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<u32>
+
+@internal(intrinsic_load_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec3<f16>
 
 @internal(intrinsic_load_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f32>
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
-  return mat2x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)));
+@internal(intrinsic_load_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<i32>
+
+@internal(intrinsic_load_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<u32>
+
+@internal(intrinsic_load_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> vec4<f16>
+
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f32> {
+  return mat2x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)));
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f32> {
   return mat2x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
-  return mat2x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)));
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f32> {
+  return mat2x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
-  return mat3x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)), tint_symbol_6(buffer, (offset + 16u)));
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f32> {
+  return mat3x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)));
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f32> {
   return mat3x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)), tint_symbol_9(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
-  return mat3x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)));
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f32> {
+  return mat3x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)), tint_symbol_13(buffer, (offset + 32u)));
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
-  return mat4x2<f32>(tint_symbol_6(buffer, (offset + 0u)), tint_symbol_6(buffer, (offset + 8u)), tint_symbol_6(buffer, (offset + 16u)), tint_symbol_6(buffer, (offset + 24u)));
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f32> {
+  return mat4x2<f32>(tint_symbol_5(buffer, (offset + 0u)), tint_symbol_5(buffer, (offset + 8u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)));
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f32> {
   return mat4x3<f32>(tint_symbol_9(buffer, (offset + 0u)), tint_symbol_9(buffer, (offset + 16u)), tint_symbol_9(buffer, (offset + 32u)), tint_symbol_9(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
-  return mat4x4<f32>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 32u)), tint_symbol_12(buffer, (offset + 48u)));
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f32> {
+  return mat4x4<f32>(tint_symbol_13(buffer, (offset + 0u)), tint_symbol_13(buffer, (offset + 16u)), tint_symbol_13(buffer, (offset + 32u)), tint_symbol_13(buffer, (offset + 48u)));
 }
 
-fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x2<f16> {
+  return mat2x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)));
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x3<f16> {
+  return mat2x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat2x4<f16> {
+  return mat2x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x2<f16> {
+  return mat3x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)), tint_symbol_8(buffer, (offset + 8u)));
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x3<f16> {
+  return mat3x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)), tint_symbol_12(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat3x4<f16> {
+  return mat3x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)), tint_symbol_16(buffer, (offset + 16u)));
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x2<f16> {
+  return mat4x2<f16>(tint_symbol_8(buffer, (offset + 0u)), tint_symbol_8(buffer, (offset + 4u)), tint_symbol_8(buffer, (offset + 8u)), tint_symbol_8(buffer, (offset + 12u)));
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x3<f16> {
+  return mat4x3<f16>(tint_symbol_12(buffer, (offset + 0u)), tint_symbol_12(buffer, (offset + 8u)), tint_symbol_12(buffer, (offset + 16u)), tint_symbol_12(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> mat4x4<f16> {
+  return mat4x4<f16>(tint_symbol_16(buffer, (offset + 0u)), tint_symbol_16(buffer, (offset + 8u)), tint_symbol_16(buffer, (offset + 16u)), tint_symbol_16(buffer, (offset + 24u)));
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<vec3<f32>, 2u> {
   var arr : array<vec3<f32>, 2u>;
-  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    arr[i_1] = tint_symbol_9(buffer, (offset + (i_1 * 16u)));
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    arr[i] = tint_symbol_9(buffer, (offset + (i * 16u)));
   }
   return arr;
 }
 
+fn tint_symbol_36(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> array<mat4x2<f16>, 2u> {
+  var arr_1 : array<mat4x2<f16>, 2u>;
+  for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
+    arr_1[i_1] = tint_symbol_32(buffer, (offset + (i_1 * 16u)));
+  }
+  return arr_1;
+}
+
 fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32) -> SB {
-  return SB(tint_symbol_1(buffer, (offset + 0u)), tint_symbol_2(buffer, (offset + 4u)), tint_symbol_3(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 16u)), tint_symbol_5(buffer, (offset + 24u)), tint_symbol_6(buffer, (offset + 32u)), tint_symbol_7(buffer, (offset + 48u)), tint_symbol_8(buffer, (offset + 64u)), tint_symbol_9(buffer, (offset + 80u)), tint_symbol_10(buffer, (offset + 96u)), tint_symbol_11(buffer, (offset + 112u)), tint_symbol_12(buffer, (offset + 128u)), tint_symbol_13(buffer, (offset + 144u)), tint_symbol_14(buffer, (offset + 160u)), tint_symbol_15(buffer, (offset + 192u)), tint_symbol_16(buffer, (offset + 224u)), tint_symbol_17(buffer, (offset + 256u)), tint_symbol_18(buffer, (offset + 304u)), tint_symbol_19(buffer, (offset + 352u)), tint_symbol_20(buffer, (offset + 384u)), tint_symbol_21(buffer, (offset + 448u)), tint_symbol_22(buffer, (offset + 512u)));
+  return SB(tint_symbol_1(buffer, (offset + 0u)), tint_symbol_2(buffer, (offset + 4u)), tint_symbol_3(buffer, (offset + 8u)), tint_symbol_4(buffer, (offset + 12u)), tint_symbol_5(buffer, (offset + 16u)), tint_symbol_6(buffer, (offset + 24u)), tint_symbol_7(buffer, (offset + 32u)), tint_symbol_8(buffer, (offset + 40u)), tint_symbol_9(buffer, (offset + 48u)), tint_symbol_10(buffer, (offset + 64u)), tint_symbol_11(buffer, (offset + 80u)), tint_symbol_12(buffer, (offset + 96u)), tint_symbol_13(buffer, (offset + 112u)), tint_symbol_14(buffer, (offset + 128u)), tint_symbol_15(buffer, (offset + 144u)), tint_symbol_16(buffer, (offset + 160u)), tint_symbol_17(buffer, (offset + 168u)), tint_symbol_18(buffer, (offset + 192u)), tint_symbol_19(buffer, (offset + 224u)), tint_symbol_20(buffer, (offset + 256u)), tint_symbol_21(buffer, (offset + 288u)), tint_symbol_22(buffer, (offset + 336u)), tint_symbol_23(buffer, (offset + 384u)), tint_symbol_24(buffer, (offset + 416u)), tint_symbol_25(buffer, (offset + 480u)), tint_symbol_26(buffer, (offset + 544u)), tint_symbol_27(buffer, (offset + 552u)), tint_symbol_28(buffer, (offset + 568u)), tint_symbol_29(buffer, (offset + 584u)), tint_symbol_30(buffer, (offset + 600u)), tint_symbol_31(buffer, (offset + 624u)), tint_symbol_32(buffer, (offset + 648u)), tint_symbol_33(buffer, (offset + 664u)), tint_symbol_34(buffer, (offset + 696u)), tint_symbol_35(buffer, (offset + 736u)), tint_symbol_36(buffer, (offset + 768u)));
 }
 
 @compute @workgroup_size(1)
@@ -1545,28 +2437,42 @@
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 )";
 
@@ -1577,29 +2483,45 @@
 
 TEST_F(DecomposeMemoryAccessTest, StoreStructure) {
     auto* src = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
@@ -1611,153 +2533,256 @@
 )";
 
     auto* expect = R"(
+enable f16;
+
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
+@internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
+
 @internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
 
 @internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
 
-@internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
-
-@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
-
-@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+@internal(intrinsic_store_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f16)
 
 @internal(intrinsic_store_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
 
-@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
+@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
 
-@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+
+@internal(intrinsic_store_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f16>)
 
 @internal(intrinsic_store_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f32>)
 
-@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
 
-@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+
+@internal(intrinsic_store_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f16>)
 
 @internal(intrinsic_store_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
+@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+
+@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+
+@internal(intrinsic_store_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f16>)
+
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
-  tint_symbol_6(buffer, (offset + 16u), value[2u]);
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+  tint_symbol_5(buffer, (offset + 16u), value[2u]);
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
   tint_symbol_9(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
-  tint_symbol_12(buffer, (offset + 32u), value[2u]);
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
+  tint_symbol_13(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
-  tint_symbol_6(buffer, (offset + 16u), value[2u]);
-  tint_symbol_6(buffer, (offset + 24u), value[3u]);
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+  tint_symbol_5(buffer, (offset + 16u), value[2u]);
+  tint_symbol_5(buffer, (offset + 24u), value[3u]);
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
   tint_symbol_9(buffer, (offset + 32u), value[2u]);
   tint_symbol_9(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
-  tint_symbol_12(buffer, (offset + 32u), value[2u]);
-  tint_symbol_12(buffer, (offset + 48u), value[3u]);
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
+  tint_symbol_13(buffer, (offset + 32u), value[2u]);
+  tint_symbol_13(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+  tint_symbol_8(buffer, (offset + 8u), value[2u]);
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+  tint_symbol_12(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+  tint_symbol_16(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+  tint_symbol_8(buffer, (offset + 8u), value[2u]);
+  tint_symbol_8(buffer, (offset + 12u), value[3u]);
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+  tint_symbol_12(buffer, (offset + 16u), value[2u]);
+  tint_symbol_12(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+  tint_symbol_16(buffer, (offset + 16u), value[2u]);
+  tint_symbol_16(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
   var array = value;
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    tint_symbol_9(buffer, (offset + (i * 16u)), array[i]);
+  }
+}
+
+fn tint_symbol_36(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<mat4x2<f16>, 2u>) {
+  var array_1 = value;
   for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    tint_symbol_9(buffer, (offset + (i_1 * 16u)), array[i_1]);
+    tint_symbol_32(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
   }
 }
 
 fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : SB) {
-  tint_symbol_1(buffer, (offset + 0u), value.a);
-  tint_symbol_2(buffer, (offset + 4u), value.b);
-  tint_symbol_3(buffer, (offset + 8u), value.c);
-  tint_symbol_4(buffer, (offset + 16u), value.d);
-  tint_symbol_5(buffer, (offset + 24u), value.e);
-  tint_symbol_6(buffer, (offset + 32u), value.f);
-  tint_symbol_7(buffer, (offset + 48u), value.g);
-  tint_symbol_8(buffer, (offset + 64u), value.h);
-  tint_symbol_9(buffer, (offset + 80u), value.i);
-  tint_symbol_10(buffer, (offset + 96u), value.j);
-  tint_symbol_11(buffer, (offset + 112u), value.k);
-  tint_symbol_12(buffer, (offset + 128u), value.l);
-  tint_symbol_13(buffer, (offset + 144u), value.m);
-  tint_symbol_14(buffer, (offset + 160u), value.n);
-  tint_symbol_15(buffer, (offset + 192u), value.o);
-  tint_symbol_16(buffer, (offset + 224u), value.p);
-  tint_symbol_17(buffer, (offset + 256u), value.q);
-  tint_symbol_18(buffer, (offset + 304u), value.r);
-  tint_symbol_19(buffer, (offset + 352u), value.s);
-  tint_symbol_20(buffer, (offset + 384u), value.t);
-  tint_symbol_21(buffer, (offset + 448u), value.u);
-  tint_symbol_22(buffer, (offset + 512u), value.v);
+  tint_symbol_1(buffer, (offset + 0u), value.scalar_f32);
+  tint_symbol_2(buffer, (offset + 4u), value.scalar_i32);
+  tint_symbol_3(buffer, (offset + 8u), value.scalar_u32);
+  tint_symbol_4(buffer, (offset + 12u), value.scalar_f16);
+  tint_symbol_5(buffer, (offset + 16u), value.vec2_f32);
+  tint_symbol_6(buffer, (offset + 24u), value.vec2_i32);
+  tint_symbol_7(buffer, (offset + 32u), value.vec2_u32);
+  tint_symbol_8(buffer, (offset + 40u), value.vec2_f16);
+  tint_symbol_9(buffer, (offset + 48u), value.vec3_f32);
+  tint_symbol_10(buffer, (offset + 64u), value.vec3_i32);
+  tint_symbol_11(buffer, (offset + 80u), value.vec3_u32);
+  tint_symbol_12(buffer, (offset + 96u), value.vec3_f16);
+  tint_symbol_13(buffer, (offset + 112u), value.vec4_f32);
+  tint_symbol_14(buffer, (offset + 128u), value.vec4_i32);
+  tint_symbol_15(buffer, (offset + 144u), value.vec4_u32);
+  tint_symbol_16(buffer, (offset + 160u), value.vec4_f16);
+  tint_symbol_17(buffer, (offset + 168u), value.mat2x2_f32);
+  tint_symbol_18(buffer, (offset + 192u), value.mat2x3_f32);
+  tint_symbol_19(buffer, (offset + 224u), value.mat2x4_f32);
+  tint_symbol_20(buffer, (offset + 256u), value.mat3x2_f32);
+  tint_symbol_21(buffer, (offset + 288u), value.mat3x3_f32);
+  tint_symbol_22(buffer, (offset + 336u), value.mat3x4_f32);
+  tint_symbol_23(buffer, (offset + 384u), value.mat4x2_f32);
+  tint_symbol_24(buffer, (offset + 416u), value.mat4x3_f32);
+  tint_symbol_25(buffer, (offset + 480u), value.mat4x4_f32);
+  tint_symbol_26(buffer, (offset + 544u), value.mat2x2_f16);
+  tint_symbol_27(buffer, (offset + 552u), value.mat2x3_f16);
+  tint_symbol_28(buffer, (offset + 568u), value.mat2x4_f16);
+  tint_symbol_29(buffer, (offset + 584u), value.mat3x2_f16);
+  tint_symbol_30(buffer, (offset + 600u), value.mat3x3_f16);
+  tint_symbol_31(buffer, (offset + 624u), value.mat3x4_f16);
+  tint_symbol_32(buffer, (offset + 648u), value.mat4x2_f16);
+  tint_symbol_33(buffer, (offset + 664u), value.mat4x3_f16);
+  tint_symbol_34(buffer, (offset + 696u), value.mat4x4_f16);
+  tint_symbol_35(buffer, (offset + 736u), value.arr2_vec3_f32);
+  tint_symbol_36(buffer, (offset + 768u), value.arr2_mat4x2_f16);
 }
 
 @compute @workgroup_size(1)
@@ -1773,6 +2798,8 @@
 
 TEST_F(DecomposeMemoryAccessTest, StoreStructure_OutOfOrder) {
     auto* src = R"(
+enable f16;
+
 @compute @workgroup_size(1)
 fn main() {
   sb = SB();
@@ -1781,152 +2808,255 @@
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 };
 )";
 
     auto* expect = R"(
-@internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
-
-@internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+enable f16;
 
 @internal(intrinsic_store_storage_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
+fn tint_symbol_1(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f32)
 
-@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
+@internal(intrinsic_store_storage_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_2(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : i32)
 
-@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+@internal(intrinsic_store_storage_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_3(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : u32)
+
+@internal(intrinsic_store_storage_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_4(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : f16)
 
 @internal(intrinsic_store_storage_vec2_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
+fn tint_symbol_5(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f32>)
 
-@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
+@internal(intrinsic_store_storage_vec2_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_6(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<i32>)
 
-@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+@internal(intrinsic_store_storage_vec2_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_7(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<u32>)
+
+@internal(intrinsic_store_storage_vec2_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_8(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec2<f16>)
 
 @internal(intrinsic_store_storage_vec3_f32) @internal(disable_validation__function_has_no_body)
 fn tint_symbol_9(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f32>)
 
-@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+@internal(intrinsic_store_storage_vec3_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_10(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<i32>)
 
-@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+@internal(intrinsic_store_storage_vec3_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_11(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<u32>)
+
+@internal(intrinsic_store_storage_vec3_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec3<f16>)
 
 @internal(intrinsic_store_storage_vec4_f32) @internal(disable_validation__function_has_no_body)
-fn tint_symbol_12(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
+fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f32>)
 
-fn tint_symbol_13(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
+@internal(intrinsic_store_storage_vec4_i32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<i32>)
+
+@internal(intrinsic_store_storage_vec4_u32) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<u32>)
+
+@internal(intrinsic_store_storage_vec4_f16) @internal(disable_validation__function_has_no_body)
+fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : vec4<f16>)
+
+fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
 }
 
-fn tint_symbol_14(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
+fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_15(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
+fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
 }
 
-fn tint_symbol_16(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
-  tint_symbol_6(buffer, (offset + 16u), value[2u]);
+fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+  tint_symbol_5(buffer, (offset + 16u), value[2u]);
 }
 
-fn tint_symbol_17(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
+fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
   tint_symbol_9(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_18(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
-  tint_symbol_12(buffer, (offset + 32u), value[2u]);
+fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
+  tint_symbol_13(buffer, (offset + 32u), value[2u]);
 }
 
-fn tint_symbol_19(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
-  tint_symbol_6(buffer, (offset + 0u), value[0u]);
-  tint_symbol_6(buffer, (offset + 8u), value[1u]);
-  tint_symbol_6(buffer, (offset + 16u), value[2u]);
-  tint_symbol_6(buffer, (offset + 24u), value[3u]);
+fn tint_symbol_23(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f32>) {
+  tint_symbol_5(buffer, (offset + 0u), value[0u]);
+  tint_symbol_5(buffer, (offset + 8u), value[1u]);
+  tint_symbol_5(buffer, (offset + 16u), value[2u]);
+  tint_symbol_5(buffer, (offset + 24u), value[3u]);
 }
 
-fn tint_symbol_20(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
+fn tint_symbol_24(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f32>) {
   tint_symbol_9(buffer, (offset + 0u), value[0u]);
   tint_symbol_9(buffer, (offset + 16u), value[1u]);
   tint_symbol_9(buffer, (offset + 32u), value[2u]);
   tint_symbol_9(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_21(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
-  tint_symbol_12(buffer, (offset + 0u), value[0u]);
-  tint_symbol_12(buffer, (offset + 16u), value[1u]);
-  tint_symbol_12(buffer, (offset + 32u), value[2u]);
-  tint_symbol_12(buffer, (offset + 48u), value[3u]);
+fn tint_symbol_25(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f32>) {
+  tint_symbol_13(buffer, (offset + 0u), value[0u]);
+  tint_symbol_13(buffer, (offset + 16u), value[1u]);
+  tint_symbol_13(buffer, (offset + 32u), value[2u]);
+  tint_symbol_13(buffer, (offset + 48u), value[3u]);
 }
 
-fn tint_symbol_22(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
+fn tint_symbol_26(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+}
+
+fn tint_symbol_27(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_28(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat2x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+}
+
+fn tint_symbol_29(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+  tint_symbol_8(buffer, (offset + 8u), value[2u]);
+}
+
+fn tint_symbol_30(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+  tint_symbol_12(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_31(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat3x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+  tint_symbol_16(buffer, (offset + 16u), value[2u]);
+}
+
+fn tint_symbol_32(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x2<f16>) {
+  tint_symbol_8(buffer, (offset + 0u), value[0u]);
+  tint_symbol_8(buffer, (offset + 4u), value[1u]);
+  tint_symbol_8(buffer, (offset + 8u), value[2u]);
+  tint_symbol_8(buffer, (offset + 12u), value[3u]);
+}
+
+fn tint_symbol_33(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x3<f16>) {
+  tint_symbol_12(buffer, (offset + 0u), value[0u]);
+  tint_symbol_12(buffer, (offset + 8u), value[1u]);
+  tint_symbol_12(buffer, (offset + 16u), value[2u]);
+  tint_symbol_12(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_34(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : mat4x4<f16>) {
+  tint_symbol_16(buffer, (offset + 0u), value[0u]);
+  tint_symbol_16(buffer, (offset + 8u), value[1u]);
+  tint_symbol_16(buffer, (offset + 16u), value[2u]);
+  tint_symbol_16(buffer, (offset + 24u), value[3u]);
+}
+
+fn tint_symbol_35(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<vec3<f32>, 2u>) {
   var array = value;
+  for(var i = 0u; (i < 2u); i = (i + 1u)) {
+    tint_symbol_9(buffer, (offset + (i * 16u)), array[i]);
+  }
+}
+
+fn tint_symbol_36(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : array<mat4x2<f16>, 2u>) {
+  var array_1 = value;
   for(var i_1 = 0u; (i_1 < 2u); i_1 = (i_1 + 1u)) {
-    tint_symbol_9(buffer, (offset + (i_1 * 16u)), array[i_1]);
+    tint_symbol_32(buffer, (offset + (i_1 * 16u)), array_1[i_1]);
   }
 }
 
 fn tint_symbol(@internal(disable_validation__function_parameter) buffer : ptr<storage, SB, read_write>, offset : u32, value : SB) {
-  tint_symbol_1(buffer, (offset + 0u), value.a);
-  tint_symbol_2(buffer, (offset + 4u), value.b);
-  tint_symbol_3(buffer, (offset + 8u), value.c);
-  tint_symbol_4(buffer, (offset + 16u), value.d);
-  tint_symbol_5(buffer, (offset + 24u), value.e);
-  tint_symbol_6(buffer, (offset + 32u), value.f);
-  tint_symbol_7(buffer, (offset + 48u), value.g);
-  tint_symbol_8(buffer, (offset + 64u), value.h);
-  tint_symbol_9(buffer, (offset + 80u), value.i);
-  tint_symbol_10(buffer, (offset + 96u), value.j);
-  tint_symbol_11(buffer, (offset + 112u), value.k);
-  tint_symbol_12(buffer, (offset + 128u), value.l);
-  tint_symbol_13(buffer, (offset + 144u), value.m);
-  tint_symbol_14(buffer, (offset + 160u), value.n);
-  tint_symbol_15(buffer, (offset + 192u), value.o);
-  tint_symbol_16(buffer, (offset + 224u), value.p);
-  tint_symbol_17(buffer, (offset + 256u), value.q);
-  tint_symbol_18(buffer, (offset + 304u), value.r);
-  tint_symbol_19(buffer, (offset + 352u), value.s);
-  tint_symbol_20(buffer, (offset + 384u), value.t);
-  tint_symbol_21(buffer, (offset + 448u), value.u);
-  tint_symbol_22(buffer, (offset + 512u), value.v);
+  tint_symbol_1(buffer, (offset + 0u), value.scalar_f32);
+  tint_symbol_2(buffer, (offset + 4u), value.scalar_i32);
+  tint_symbol_3(buffer, (offset + 8u), value.scalar_u32);
+  tint_symbol_4(buffer, (offset + 12u), value.scalar_f16);
+  tint_symbol_5(buffer, (offset + 16u), value.vec2_f32);
+  tint_symbol_6(buffer, (offset + 24u), value.vec2_i32);
+  tint_symbol_7(buffer, (offset + 32u), value.vec2_u32);
+  tint_symbol_8(buffer, (offset + 40u), value.vec2_f16);
+  tint_symbol_9(buffer, (offset + 48u), value.vec3_f32);
+  tint_symbol_10(buffer, (offset + 64u), value.vec3_i32);
+  tint_symbol_11(buffer, (offset + 80u), value.vec3_u32);
+  tint_symbol_12(buffer, (offset + 96u), value.vec3_f16);
+  tint_symbol_13(buffer, (offset + 112u), value.vec4_f32);
+  tint_symbol_14(buffer, (offset + 128u), value.vec4_i32);
+  tint_symbol_15(buffer, (offset + 144u), value.vec4_u32);
+  tint_symbol_16(buffer, (offset + 160u), value.vec4_f16);
+  tint_symbol_17(buffer, (offset + 168u), value.mat2x2_f32);
+  tint_symbol_18(buffer, (offset + 192u), value.mat2x3_f32);
+  tint_symbol_19(buffer, (offset + 224u), value.mat2x4_f32);
+  tint_symbol_20(buffer, (offset + 256u), value.mat3x2_f32);
+  tint_symbol_21(buffer, (offset + 288u), value.mat3x3_f32);
+  tint_symbol_22(buffer, (offset + 336u), value.mat3x4_f32);
+  tint_symbol_23(buffer, (offset + 384u), value.mat4x2_f32);
+  tint_symbol_24(buffer, (offset + 416u), value.mat4x3_f32);
+  tint_symbol_25(buffer, (offset + 480u), value.mat4x4_f32);
+  tint_symbol_26(buffer, (offset + 544u), value.mat2x2_f16);
+  tint_symbol_27(buffer, (offset + 552u), value.mat2x3_f16);
+  tint_symbol_28(buffer, (offset + 568u), value.mat2x4_f16);
+  tint_symbol_29(buffer, (offset + 584u), value.mat3x2_f16);
+  tint_symbol_30(buffer, (offset + 600u), value.mat3x3_f16);
+  tint_symbol_31(buffer, (offset + 624u), value.mat3x4_f16);
+  tint_symbol_32(buffer, (offset + 648u), value.mat4x2_f16);
+  tint_symbol_33(buffer, (offset + 664u), value.mat4x3_f16);
+  tint_symbol_34(buffer, (offset + 696u), value.mat4x4_f16);
+  tint_symbol_35(buffer, (offset + 736u), value.arr2_vec3_f32);
+  tint_symbol_36(buffer, (offset + 768u), value.arr2_mat4x2_f16);
 }
 
 @compute @workgroup_size(1)
@@ -1937,28 +3067,42 @@
 @group(0) @binding(0) var<storage, read_write> sb : SB;
 
 struct SB {
-  a : i32,
-  b : u32,
-  c : f32,
-  d : vec2<i32>,
-  e : vec2<u32>,
-  f : vec2<f32>,
-  g : vec3<i32>,
-  h : vec3<u32>,
-  i : vec3<f32>,
-  j : vec4<i32>,
-  k : vec4<u32>,
-  l : vec4<f32>,
-  m : mat2x2<f32>,
-  n : mat2x3<f32>,
-  o : mat2x4<f32>,
-  p : mat3x2<f32>,
-  q : mat3x3<f32>,
-  r : mat3x4<f32>,
-  s : mat4x2<f32>,
-  t : mat4x3<f32>,
-  u : mat4x4<f32>,
-  v : array<vec3<f32>, 2>,
+  scalar_f32 : f32,
+  scalar_i32 : i32,
+  scalar_u32 : u32,
+  scalar_f16 : f16,
+  vec2_f32 : vec2<f32>,
+  vec2_i32 : vec2<i32>,
+  vec2_u32 : vec2<u32>,
+  vec2_f16 : vec2<f16>,
+  vec3_f32 : vec3<f32>,
+  vec3_i32 : vec3<i32>,
+  vec3_u32 : vec3<u32>,
+  vec3_f16 : vec3<f16>,
+  vec4_f32 : vec4<f32>,
+  vec4_i32 : vec4<i32>,
+  vec4_u32 : vec4<u32>,
+  vec4_f16 : vec4<f16>,
+  mat2x2_f32 : mat2x2<f32>,
+  mat2x3_f32 : mat2x3<f32>,
+  mat2x4_f32 : mat2x4<f32>,
+  mat3x2_f32 : mat3x2<f32>,
+  mat3x3_f32 : mat3x3<f32>,
+  mat3x4_f32 : mat3x4<f32>,
+  mat4x2_f32 : mat4x2<f32>,
+  mat4x3_f32 : mat4x3<f32>,
+  mat4x4_f32 : mat4x4<f32>,
+  mat2x2_f16 : mat2x2<f16>,
+  mat2x3_f16 : mat2x3<f16>,
+  mat2x4_f16 : mat2x4<f16>,
+  mat3x2_f16 : mat3x2<f16>,
+  mat3x3_f16 : mat3x3<f16>,
+  mat3x4_f16 : mat3x4<f16>,
+  mat4x2_f16 : mat4x2<f16>,
+  mat4x3_f16 : mat4x3<f16>,
+  mat4x4_f16 : mat4x4<f16>,
+  arr2_vec3_f32 : array<vec3<f32>, 2>,
+  arr2_mat4x2_f16 : array<mat4x2<f16>, 2>,
 }
 )";
 

diff --git a/src/tint/transform/std140.cc b/src/tint/transform/std140.cc
index 8b566fe..a371f24 100644
--- a/src/tint/transform/std140.cc
+++ b/src/tint/transform/std140.cc

@@ -265,8 +265,8 @@
     };
 
     /// @returns true if the given matrix needs decomposing to column vectors for std140 layout.
-    /// TODO(crbug.com/tint/1502): This may need adjusting for `f16` matrices.
-    static bool MatrixNeedsDecomposing(const sem::Matrix* mat) { return mat->ColumnStride() == 8; }
+    /// Std140 layout require matrix stride to be 16, otherwise decomposing is needed.
+    static bool MatrixNeedsDecomposing(const sem::Matrix* mat) { return mat->ColumnStride() != 16; }
 
     /// ForkTypes walks the user-declared types in dependency order, forking structures that are
     /// used as uniform buffers which (transitively) use matrices that need std140 decomposition to
@@ -474,7 +474,7 @@
                 // natural size for the matrix. This extra padding needs to be
                 // applied to the last column vector.
                 attributes.Push(
-                    b.MemberSize(AInt(size - mat->ColumnType()->Size() * (num_columns - 1))));
+                    b.MemberSize(AInt(size - mat->ColumnType()->Align() * (num_columns - 1))));
             }
 
             // Build the member
@@ -645,7 +645,8 @@
                 return "mat" + std::to_string(mat->columns()) + "x" + std::to_string(mat->rows()) +
                        "_" + ConvertSuffix(mat->type());
             },
-            [&](const sem::F32*) { return "f32"; },
+            [&](const sem::F32*) { return "f32"; },  //
+            [&](const sem::F16*) { return "f16"; },
             [&](Default) {
                 TINT_ICE(Transform, b.Diagnostics())
                     << "unhandled type for conversion name: " << src->FriendlyName(ty);

diff --git a/src/tint/transform/std140.h b/src/tint/transform/std140.h
index 49e663d..769932f 100644
--- a/src/tint/transform/std140.h
+++ b/src/tint/transform/std140.h

@@ -20,11 +20,12 @@
 namespace tint::transform {
 
 /// Std140 is a transform that forks types used in the uniform address space that contain
-/// `matNx2<f32>` matrices into `N`x`vec2<f32>` column vectors. Types that transitively use these
-/// forked types are also forked. `var<uniform>` variables will use these forked types, and
-/// expressions loading from these variables will do appropriate conversions to the regular WGSL
-/// types. As `matNx2<f32>` matrices are the only type that violate std140-layout, this
-/// transformation is sufficient to have any WGSL structure be std140-layout conformant.
+/// `matNx2<f32>` matrices into `N`x`vec2<f32>` column vectors, and `matNxM<f16>` matrices into
+/// `N`x`vecM<f16>` column vectors. Types that transitively use these forked types are also forked.
+/// `var<uniform>` variables will use these forked types, and expressions loading from these
+/// variables will do appropriate conversions to the regular WGSL types. As `matNx2<f32>` and
+/// `matNxM<f16>` matrices are the only type that violate std140-layout, this transformation is
+/// sufficient to have any WGSL structure be std140-layout conformant.
 ///
 /// @note This transform requires the PromoteSideEffectsToDecl transform to have been run first.
 class Std140 final : public Castable<Std140, Transform> {

diff --git a/src/tint/transform/std140_exhaustive_test.cc b/src/tint/transform/std140_exhaustive_test.cc
index 01d2dae..f50e1c4 100644
--- a/src/tint/transform/std140_exhaustive_test.cc
+++ b/src/tint/transform/std140_exhaustive_test.cc

@@ -2838,6 +2838,15 @@
                              {4, 2, MatrixType::f32},
                              {4, 3, MatrixType::f32},
                              {4, 4, MatrixType::f32},
+                             {2, 2, MatrixType::f16},
+                             {2, 3, MatrixType::f16},
+                             {2, 4, MatrixType::f16},
+                             {3, 2, MatrixType::f16},
+                             {3, 3, MatrixType::f16},
+                             {3, 4, MatrixType::f16},
+                             {4, 2, MatrixType::f16},
+                             {4, 3, MatrixType::f16},
+                             {4, 4, MatrixType::f16},
                          }));
 
 using Std140Test_MatrixArray = TransformTestWithParam<MatrixCase>;
@@ -4866,6 +4875,15 @@
                              {4, 2, MatrixType::f32},
                              {4, 3, MatrixType::f32},
                              {4, 4, MatrixType::f32},
+                             {2, 2, MatrixType::f16},
+                             {2, 3, MatrixType::f16},
+                             {2, 4, MatrixType::f16},
+                             {3, 2, MatrixType::f16},
+                             {3, 3, MatrixType::f16},
+                             {3, 4, MatrixType::f16},
+                             {4, 2, MatrixType::f16},
+                             {4, 3, MatrixType::f16},
+                             {4, 4, MatrixType::f16},
                          }));
 
 }  // namespace

diff --git a/src/tint/transform/std140_f16_test.cc b/src/tint/transform/std140_f16_test.cc
new file mode 100644
index 0000000..898bb73
--- /dev/null
+++ b/src/tint/transform/std140_f16_test.cc

@@ -0,0 +1,3596 @@
+// Copyright 2022 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/tint/transform/std140.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "src/tint/transform/test_helper.h"
+#include "src/tint/utils/string.h"
+
+namespace tint::transform {
+namespace {
+
+using Std140Test_F16 = TransformTest;
+
+TEST_F(Std140Test_F16, StructMatricesUniform) {
+    auto* src = R"(
+enable f16;
+
+struct S2x2F16 {
+  m : mat2x2<f16>,
+}
+struct S3x2F16 {
+  m : mat3x2<f16>,
+}
+struct S4x2F16 {
+  m : mat4x2<f16>,
+}
+struct S2x3F16 {
+  m : mat2x3<f16>,
+}
+struct S3x3F16 {
+  m : mat3x3<f16>,
+}
+struct S4x3F16 {
+  m : mat4x3<f16>,
+}
+struct S2x4F16 {
+  m : mat2x4<f16>,
+}
+struct S3x4F16 {
+  m : mat3x4<f16>,
+}
+struct S4x4F16 {
+  m : mat4x4<f16>,
+}
+
+@group(2) @binding(2) var<uniform> s2x2f16 : S2x2F16;
+@group(3) @binding(2) var<uniform> s3x2f16 : S3x2F16;
+@group(4) @binding(2) var<uniform> s4x2f16 : S4x2F16;
+@group(2) @binding(3) var<uniform> s2x3f16 : S2x3F16;
+@group(3) @binding(3) var<uniform> s3x3f16 : S3x3F16;
+@group(4) @binding(3) var<uniform> s4x3f16 : S4x3F16;
+@group(2) @binding(4) var<uniform> s2x4f16 : S2x4F16;
+@group(3) @binding(4) var<uniform> s3x4f16 : S3x4F16;
+@group(4) @binding(4) var<uniform> s4x4f16 : S4x4F16;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S2x2F16 {
+  m : mat2x2<f16>,
+}
+
+struct S2x2F16_std140 {
+  m_0 : vec2<f16>,
+  m_1 : vec2<f16>,
+}
+
+struct S3x2F16 {
+  m : mat3x2<f16>,
+}
+
+struct S3x2F16_std140 {
+  m_0 : vec2<f16>,
+  m_1 : vec2<f16>,
+  m_2 : vec2<f16>,
+}
+
+struct S4x2F16 {
+  m : mat4x2<f16>,
+}
+
+struct S4x2F16_std140 {
+  m_0 : vec2<f16>,
+  m_1 : vec2<f16>,
+  m_2 : vec2<f16>,
+  m_3 : vec2<f16>,
+}
+
+struct S2x3F16 {
+  m : mat2x3<f16>,
+}
+
+struct S2x3F16_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+struct S3x3F16 {
+  m : mat3x3<f16>,
+}
+
+struct S3x3F16_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+  m_2 : vec3<f16>,
+}
+
+struct S4x3F16 {
+  m : mat4x3<f16>,
+}
+
+struct S4x3F16_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+  m_2 : vec3<f16>,
+  m_3 : vec3<f16>,
+}
+
+struct S2x4F16 {
+  m : mat2x4<f16>,
+}
+
+struct S2x4F16_std140 {
+  m_0 : vec4<f16>,
+  m_1 : vec4<f16>,
+}
+
+struct S3x4F16 {
+  m : mat3x4<f16>,
+}
+
+struct S3x4F16_std140 {
+  m_0 : vec4<f16>,
+  m_1 : vec4<f16>,
+  m_2 : vec4<f16>,
+}
+
+struct S4x4F16 {
+  m : mat4x4<f16>,
+}
+
+struct S4x4F16_std140 {
+  m_0 : vec4<f16>,
+  m_1 : vec4<f16>,
+  m_2 : vec4<f16>,
+  m_3 : vec4<f16>,
+}
+
+@group(2) @binding(2) var<uniform> s2x2f16 : S2x2F16_std140;
+
+@group(3) @binding(2) var<uniform> s3x2f16 : S3x2F16_std140;
+
+@group(4) @binding(2) var<uniform> s4x2f16 : S4x2F16_std140;
+
+@group(2) @binding(3) var<uniform> s2x3f16 : S2x3F16_std140;
+
+@group(3) @binding(3) var<uniform> s3x3f16 : S3x3F16_std140;
+
+@group(4) @binding(3) var<uniform> s4x3f16 : S4x3F16_std140;
+
+@group(2) @binding(4) var<uniform> s2x4f16 : S2x4F16_std140;
+
+@group(3) @binding(4) var<uniform> s3x4f16 : S3x4F16_std140;
+
+@group(4) @binding(4) var<uniform> s4x4f16 : S4x4F16_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+// In the following tests we only test `mat2x3<f16>`, and set all constant column index to 1, row
+// index 0, inner array index 2, and outer array index 3. For exhaustive tests, i.e. tests on all
+// matrix shape and different valid constant index, please refer to std140_exhaustive_test.cc
+
+TEST_F(Std140Test_F16, SingleStructMatUniform_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, CustomAlign_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @align(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @align(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  @align(128i)
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, CustomSizeMat_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @size(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @size(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  m_0 : vec3<f16>,
+  @size(120)
+  m_1 : vec3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, CustomAlignAndSize_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @align(128) @size(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  before : i32,
+  @align(128) @size(128)
+  m : mat2x3<f16>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  @align(128i)
+  m_0 : vec3<f16>,
+  @size(120)
+  m_1 : vec3<f16>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatrixUsageInForLoop_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  for(var i = u32(s.m[0][0]); (i < u32(s.m[i][1])); i += u32(s.m[1][i])) {
+  }
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_1(p0 : u32) -> f16 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[1u];
+    }
+    case 1u: {
+      return s.m_1[1u];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  for(var i = u32(s.m_0[0u]); (i < u32(load_s_m_p0_1(u32(i)))); i += u32(s.m_1[i])) {
+  }
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadMatrix_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> m : mat2x3<f16>;
+
+fn f() {
+  let l = m;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> m : mat2x3_f16;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x3_f16(m);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadColumn_ConstIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let l = a[1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn f() {
+  let l = a.col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadColumn_VariableIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn load_a_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return a.col0;
+    }
+    case 1u: {
+      return a.col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadColumnSwizzle_ConstIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let l = a[1].yzx;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn f() {
+  let l = a.col1.yzx;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadColumnSwizzle_VariableIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].yzx;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn load_a_p0_yzx(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return a.col0.yzx;
+    }
+    case 1u: {
+      return a.col1.yzx;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_yzx(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let l = a[1][0];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn f() {
+  let l = a.col1[0u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let I = 0;
+  let l = a[I][0];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn load_a_p0_0(p0 : u32) -> f16 {
+  switch(p0) {
+    case 0u: {
+      return a.col0[0u];
+    }
+    case 1u: {
+      return a.col1[0u];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_a_p0_0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let I = 0;
+  let l = a[1][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn f() {
+  let I = 0;
+  let l = a.col1[I];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, MatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : mat2x3<f16>;
+
+fn f() {
+  let I = 0;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat2x3_f16;
+
+fn load_a_p0_p1(p0 : u32, p1 : u32) -> f16 {
+  switch(p0) {
+    case 0u: {
+      return a.col0[p1];
+    }
+    case 1u: {
+      return a.col1[p1];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_NameCollision_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m_1 : i32,
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m_1 : i32,
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_1 : i32,
+  m__0 : vec3<f16>,
+  m__1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadStruct_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn f() {
+  let l = conv_S(s);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadMatrix_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m() -> mat2x3<f16> {
+  let s = &(s);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn f() {
+  let l = load_s_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadColumn_ConstIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadColumn_VariableIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return s.m_0;
+    }
+    case 1u: {
+      return s.m_1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[1][0];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_1[0u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I][0];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_0(p0 : u32) -> f16 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[0u];
+    }
+    case 1u: {
+      return s.m_1[0u];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0_0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[1][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let I = 0;
+  let l = s.m_1[I];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructMatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_p1(p0 : u32, p1 : u32) -> f16 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[p1];
+    }
+    case 1u: {
+      return s.m_1[p1];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_LoadArray_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn conv_arr3_S(val : array<S_std140, 3u>) -> array<S, 3u> {
+  var arr : array<S, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_S(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_S(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_LoadStruct_ConstIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn f() {
+  let l = conv_S(a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_LoadStruct_VariableIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_S(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_LoadMatrix_ConstArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2].m;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_2_m() -> mat2x3<f16> {
+  let s = &(a[2u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn f() {
+  let l = load_a_2_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_LoadMatrix_VariableArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_p0_m(p0 : u32) -> mat2x3<f16> {
+  let s = &(a[p0]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_m(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2].m[1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let l = a[2u].m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m[1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[2].m[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_2_m_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return a[2u].m_0;
+    }
+    case 1u: {
+      return a[2u].m_1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_2_m_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_p0_m_p1(p0 : u32, p1 : u32) -> vec3<f16> {
+  switch(p1) {
+    case 0u: {
+      return a[p0].m_0;
+    }
+    case 1u: {
+      return a[p0].m_1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_m_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructArrayStructMatUniform_Loads_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let l_a : array<Outer, 4> = a;
+  let l_a_1 : Outer = a[1];
+  let l_a_I : Outer = a[I];
+  let l_a_2_a : array<Inner, 4> = a[2].a;
+  let l_a_I_a : array<Inner, 4> = a[I].a;
+  let l_a_3_a_1 : Inner = a[3].a[1];
+  let l_a_3_a_I : Inner = a[3].a[I];
+  let l_a_I_a_1 : Inner = a[I].a[1];
+  let l_a_I_a_J : Inner = a[I].a[J];
+  let l_a_0_a_2_m : mat2x3<f16> = a[0].a[2].m;
+  let l_a_0_a_I_m : mat2x3<f16> = a[0].a[I].m;
+  let l_a_I_a_2_m : mat2x3<f16> = a[I].a[2].m;
+  let l_a_I_a_J_m : mat2x3<f16> = a[I].a[J].m;
+  let l_a_1_a_3_m_0 : vec3<f16> = a[1].a[3].m[0];
+  let l_a_I_a_J_m_K : vec3<f16> = a[I].a[J].m[K];
+  let l_a_2_a_0_m_1_0 : f16 = a[2].a[0].m[1][0];
+  let l_a_I_a_J_m_K_I : f16 = a[I].a[J].m[K][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Inner_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+struct Outer_std140 {
+  a : array<Inner_std140, 4u>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
+
+fn conv_Inner(val : Inner_std140) -> Inner {
+  return Inner(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
+  var arr : array<Inner, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Inner(val[i]);
+  }
+  return arr;
+}
+
+fn conv_Outer(val : Outer_std140) -> Outer {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
+  var arr : array<Outer, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Outer(val[i]);
+  }
+  return arr;
+}
+
+fn load_a_0_a_2_m() -> mat2x3<f16> {
+  let s = &(a[0u].a[2u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_0_a_p0_m(p0 : u32) -> mat2x3<f16> {
+  let s = &(a[0u].a[p0]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_2_m(p0 : u32) -> mat2x3<f16> {
+  let s = &(a[p0].a[2u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat2x3<f16> {
+  let s = &(a[p0].a[p1]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec3<f16> {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0;
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f16 {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0[p3];
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1[p3];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
+  let l_a_1 : Outer = conv_Outer(a[1u]);
+  let l_a_I : Outer = conv_Outer(a[I]);
+  let l_a_2_a : array<Inner, 4> = conv_arr4_Inner(a[2u].a);
+  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
+  let l_a_3_a_1 : Inner = conv_Inner(a[3u].a[1u]);
+  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
+  let l_a_I_a_1 : Inner = conv_Inner(a[I].a[1u]);
+  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
+  let l_a_0_a_2_m : mat2x3<f16> = load_a_0_a_2_m();
+  let l_a_0_a_I_m : mat2x3<f16> = load_a_0_a_p0_m(u32(I));
+  let l_a_I_a_2_m : mat2x3<f16> = load_a_p0_a_2_m(u32(I));
+  let l_a_I_a_J_m : mat2x3<f16> = load_a_p0_a_p1_m(u32(I), u32(J));
+  let l_a_1_a_3_m_0 : vec3<f16> = a[1u].a[3u].m_0;
+  let l_a_I_a_J_m_K : vec3<f16> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a_2_a_0_m_1_0 : f16 = a[2u].a[0u].m_1[0u];
+  let l_a_I_a_J_m_K_I : f16 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructArrayStructMatUniform_LoadsViaPtrs_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_I = &((*(p_a))[I]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_I_a = &((*(p_a_I)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_I = &((*(p_a_3_a))[I]);
+  let p_a_I_a_2 = &((*(p_a_I_a))[2]);
+  let p_a_I_a_J = &((*(p_a_I_a))[J]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_I_m = &((*(p_a_3_a_I)).m);
+  let p_a_I_a_2_m = &((*(p_a_I_a_2)).m);
+  let p_a_I_a_J_m = &((*(p_a_I_a_J)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let p_a_I_a_J_m_K = &((*(p_a_I_a_J_m))[K]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_I : Outer = *(p_a_I);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_I_a : array<Inner, 4> = *(p_a_I_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_I : Inner = *(p_a_3_a_I);
+  let l_a_I_a_2 : Inner = *(p_a_I_a_2);
+  let l_a_I_a_J : Inner = *(p_a_I_a_J);
+  let l_a_3_a_2_m : mat2x3<f16> = *(p_a_3_a_2_m);
+  let l_a_3_a_I_m : mat2x3<f16> = *(p_a_3_a_I_m);
+  let l_a_I_a_2_m : mat2x3<f16> = *(p_a_I_a_2_m);
+  let l_a_I_a_J_m : mat2x3<f16> = *(p_a_I_a_J_m);
+  let l_a_3_a_2_m_1 : vec3<f16> = *(p_a_3_a_2_m_1);
+  let l_a_I_a_J_m_K : vec3<f16> = *(p_a_I_a_J_m_K);
+  let l_a_2_a_0_m_1_0 : f16 = (*(p_a_3_a_2_m_1))[0];
+  let l_a_I_a_J_m_K_I : f16 = (*(p_a_I_a_J_m_K))[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct Inner {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct Inner_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+struct Outer_std140 {
+  a : array<Inner_std140, 4u>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
+
+fn conv_Inner(val : Inner_std140) -> Inner {
+  return Inner(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
+  var arr : array<Inner, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Inner(val[i]);
+  }
+  return arr;
+}
+
+fn conv_Outer(val : Outer_std140) -> Outer {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
+  var arr : array<Outer, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Outer(val[i]);
+  }
+  return arr;
+}
+
+fn load_a_3_a_2_m() -> mat2x3<f16> {
+  let s = &(a[3u].a[2u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_3_a_p0_m(p0 : u32) -> mat2x3<f16> {
+  let s = &(a[3u].a[p0]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_2_m(p0 : u32) -> mat2x3<f16> {
+  let s = &(a[p0].a[2u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat2x3<f16> {
+  let s = &(a[p0].a[p1]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec3<f16> {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0;
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f16 {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0[p3];
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1[p3];
+    }
+    default: {
+      return f16();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let p_a = conv_arr4_Outer(a);
+  let p_a_3 = conv_Outer(a[3u]);
+  let p_a_I = conv_Outer(a[I]);
+  let p_a_3_a = conv_arr4_Inner(a[3u].a);
+  let p_a_I_a = conv_arr4_Inner(a[I].a);
+  let p_a_3_a_2 = conv_Inner(a[3u].a[2u]);
+  let p_a_3_a_I = conv_Inner(a[3u].a[I]);
+  let p_a_I_a_2 = conv_Inner(a[I].a[2u]);
+  let p_a_I_a_J = conv_Inner(a[I].a[J]);
+  let p_a_3_a_2_m = load_a_3_a_2_m();
+  let p_a_3_a_I_m = load_a_3_a_p0_m(u32(I));
+  let p_a_I_a_2_m = load_a_p0_a_2_m(u32(I));
+  let p_a_I_a_J_m = load_a_p0_a_p1_m(u32(I), u32(J));
+  let p_a_3_a_2_m_1 = a[3u].a[2u].m_1;
+  let p_a_I_a_J_m_K = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
+  let l_a_3 : Outer = conv_Outer(a[3u]);
+  let l_a_I : Outer = conv_Outer(a[I]);
+  let l_a_3_a : array<Inner, 4> = conv_arr4_Inner(a[3u].a);
+  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
+  let l_a_3_a_2 : Inner = conv_Inner(a[3u].a[2u]);
+  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
+  let l_a_I_a_2 : Inner = conv_Inner(a[I].a[2u]);
+  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
+  let l_a_3_a_2_m : mat2x3<f16> = load_a_3_a_2_m();
+  let l_a_3_a_I_m : mat2x3<f16> = load_a_3_a_p0_m(u32(I));
+  let l_a_I_a_2_m : mat2x3<f16> = load_a_p0_a_2_m(u32(I));
+  let l_a_I_a_J_m : mat2x3<f16> = load_a_p0_a_p1_m(u32(I), u32(J));
+  let l_a_3_a_2_m_1 : vec3<f16> = a[3u].a[2u].m_1;
+  let l_a_I_a_J_m_K : vec3<f16> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a_2_a_0_m_1_0 : f16 = a[3u].a[2u].m_1[0u];
+  let l_a_I_a_J_m_K_I : f16 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyArray_UniformToStorage_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s = u;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn conv_arr4_S(val : array<S_std140, 4u>) -> array<S, 4u> {
+  var arr : array<S, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_S(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  s = conv_arr4_S(u);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyStruct_UniformToWorkgroup_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[0] = u[1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+var<workgroup> w : array<S, 4>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(val.v, mat2x3<f16>(val.m_0, val.m_1));
+}
+
+fn f() {
+  w[0] = conv_S(u[1u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyMatrix_UniformToPrivate_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[2].m = u[1].m;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+var<private> p : array<S, 4>;
+
+fn load_u_1_m() -> mat2x3<f16> {
+  let s = &(u[1u]);
+  return mat2x3<f16>((*(s)).m_0, (*(s)).m_1);
+}
+
+fn f() {
+  p[2].m = load_u_1_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyColumn_UniformToStorage_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s[3].m[1] = u[2].m[0];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s[3].m[1] = u[2u].m_0;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyColumnSwizzle_UniformToWorkgroup_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[3].m[1] = u[2].m[0].yzx.yzx;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[3].m[1] = u[2u].m_0.yzx.yzx;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayStructMatUniform_CopyScalar_UniformToPrivate_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[3].m[1].x = u[2].m[0].y;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat2x3<f16>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec3<f16>,
+  @size(56)
+  m_1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[3].m[1].x = u[2u].m_0[1u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadArray_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x3_f16(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let l = a[2];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x3_f16(a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x3_f16(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let l = a[2][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn f() {
+  let l = a[2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[2][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn load_a_2_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return a[2u].col0;
+    }
+    case 1u: {
+      return a[2u].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3<f16>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x3_f16, 3u>;
+
+fn load_a_p0_p1(p0 : u32, p1 : u32) -> vec3<f16> {
+  switch(p1) {
+    case 0u: {
+      return a[p0].col0;
+    }
+    case 1u: {
+      return a[p0].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructArrayMatUniform_LoadStruct_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn conv_S(val : S_std140) -> S {
+  return S(conv_arr3_mat2x3_f16(val.a));
+}
+
+fn f() {
+  let l = conv_S(s);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructArrayMatUniform_LoadArray_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x3_f16(s.a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a[2];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x3_f16(s.a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, StructArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x3_f16(s.a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       StructArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a[2][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.a[2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       StructArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       StructArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[2][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_a_2_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return s.a[2u].col0;
+    }
+    case 1u: {
+      return s.a[2u].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_s_a_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       StructArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+struct S {
+  a : array<mat2x3<f16>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x3_f16, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_a_p0_p1(p0 : u32, p1 : u32) -> vec3<f16> {
+  switch(p1) {
+    case 0u: {
+      return s.a[p0].col0;
+    }
+    case 1u: {
+      return s.a[p0].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_s_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayArrayMatUniform_LoadArrays_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn conv_arr4_arr3_mat2x3_f16(val : array<array<mat2x3_f16, 3u>, 4u>) -> array<array<mat2x3<f16>, 3u>, 4u> {
+  var arr : array<array<mat2x3<f16>, 3u>, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_arr3_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr4_arr3_mat2x3_f16(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayArrayMatUniform_LoadArray_ConstOuterArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let l = a[3];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x3_f16(a[3u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16, ArrayArrayMatUniform_LoadArray_VariableOuterArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x3_f16(val : array<mat2x3_f16, 3u>) -> array<mat2x3<f16>, 3u> {
+  var arr : array<mat2x3<f16>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x3_f16(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_arr3_mat2x3_f16(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_ConstInnerArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let l = a[3][2];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x3_f16(a[3u][2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_VariableInnerArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x3_f16(a[3u][I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_ConstInnerArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x3_f16(a[I][2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F16,
+       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_VariableInnerArrayIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn conv_mat2x3_f16(val : mat2x3_f16) -> mat2x3<f16> {
+  return mat2x3<f16>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x3_f16(a[I][I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let l = a[3][2][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn f() {
+  let l = a[3u][2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][2][I];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn load_a_3_2_p0(p0 : u32) -> vec3<f16> {
+  switch(p0) {
+    case 0u: {
+      return a[3u][2u].col0;
+    }
+    case 1u: {
+      return a[3u][2u].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_3_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][I][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let l = a[3u][I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[3][I][J];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn load_a_3_p0_p1(p0 : u32, p1 : u32) -> vec3<f16> {
+  switch(p1) {
+    case 0u: {
+      return a[3u][p0].col0;
+    }
+    case 1u: {
+      return a[3u][p0].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = load_a_3_p0_p1(u32(I), u32(J));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][2][J];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn load_a_p0_2_p1(p0 : u32, p1 : u32) -> vec3<f16> {
+  switch(p1) {
+    case 0u: {
+      return a[p0][2u].col0;
+    }
+    case 1u: {
+      return a[p0][2u].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = load_a_p0_2_p1(u32(I), u32(J));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][J][1];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][J].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F16,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x3F16) {
+    auto* src = R"(
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3<f16>, 3>, 4>;
+
+fn f() {
+  let I = 0;
+  let J = 1;
+  let K = 2;
+  let l = a[I][J][K];
+}
+)";
+
+    auto* expect = R"(
+enable f16;
+
+struct mat2x3_f16 {
+  col0 : vec3<f16>,
+  col1 : vec3<f16>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x3_f16, 3u>, 4u>;
+
+fn load_a_p0_p1_p2(p0 : u32, p1 : u32, p2 : u32) -> vec3<f16> {
+  switch(p2) {
+    case 0u: {
+      return a[p0][p1].col0;
+    }
+    case 1u: {
+      return a[p0][p1].col1;
+    }
+    default: {
+      return vec3<f16>();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let J = 1;
+  let K = 2;
+  let l = load_a_p0_p1_p2(u32(I), u32(J), u32(K));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+}  // namespace
+}  // namespace tint::transform

diff --git a/src/tint/transform/std140_f32_test.cc b/src/tint/transform/std140_f32_test.cc
new file mode 100644
index 0000000..b0bd467
--- /dev/null
+++ b/src/tint/transform/std140_f32_test.cc

@@ -0,0 +1,3359 @@
+// Copyright 2022 The Tint Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/tint/transform/std140.h"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "src/tint/transform/test_helper.h"
+#include "src/tint/utils/string.h"
+
+namespace tint::transform {
+namespace {
+
+using Std140Test_F32 = TransformTest;
+
+TEST_F(Std140Test_F32, StructMatricesUniform) {
+    auto* src = R"(
+struct S2x2F32 {
+  m : mat2x2<f32>,
+}
+struct S3x2F32 {
+  m : mat3x2<f32>,
+}
+struct S4x2F32 {
+  m : mat4x2<f32>,
+}
+struct S2x3F32 {
+  m : mat2x3<f32>,
+}
+struct S3x3F32 {
+  m : mat3x3<f32>,
+}
+struct S4x3F32 {
+  m : mat4x3<f32>,
+}
+struct S2x4F32 {
+  m : mat2x4<f32>,
+}
+struct S3x4F32 {
+  m : mat3x4<f32>,
+}
+struct S4x4F32 {
+  m : mat4x4<f32>,
+}
+
+@group(2) @binding(2) var<uniform> s2x2f32 : S2x2F32;
+@group(3) @binding(2) var<uniform> s3x2f32 : S3x2F32;
+@group(4) @binding(2) var<uniform> s4x2f32 : S4x2F32;
+@group(2) @binding(3) var<uniform> s2x3f32 : S2x3F32;
+@group(3) @binding(3) var<uniform> s3x3f32 : S3x3F32;
+@group(4) @binding(3) var<uniform> s4x3f32 : S4x3F32;
+@group(2) @binding(4) var<uniform> s2x4f32 : S2x4F32;
+@group(3) @binding(4) var<uniform> s3x4f32 : S3x4F32;
+@group(4) @binding(4) var<uniform> s4x4f32 : S4x4F32;
+)";
+
+    auto* expect = R"(
+struct S2x2F32 {
+  m : mat2x2<f32>,
+}
+
+struct S2x2F32_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+}
+
+struct S3x2F32 {
+  m : mat3x2<f32>,
+}
+
+struct S3x2F32_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+struct S4x2F32 {
+  m : mat4x2<f32>,
+}
+
+struct S4x2F32_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+  m_3 : vec2<f32>,
+}
+
+struct S2x3F32 {
+  m : mat2x3<f32>,
+}
+
+struct S3x3F32 {
+  m : mat3x3<f32>,
+}
+
+struct S4x3F32 {
+  m : mat4x3<f32>,
+}
+
+struct S2x4F32 {
+  m : mat2x4<f32>,
+}
+
+struct S3x4F32 {
+  m : mat3x4<f32>,
+}
+
+struct S4x4F32 {
+  m : mat4x4<f32>,
+}
+
+@group(2) @binding(2) var<uniform> s2x2f32 : S2x2F32_std140;
+
+@group(3) @binding(2) var<uniform> s3x2f32 : S3x2F32_std140;
+
+@group(4) @binding(2) var<uniform> s4x2f32 : S4x2F32_std140;
+
+@group(2) @binding(3) var<uniform> s2x3f32 : S2x3F32;
+
+@group(3) @binding(3) var<uniform> s3x3f32 : S3x3F32;
+
+@group(4) @binding(3) var<uniform> s4x3f32 : S4x3F32;
+
+@group(2) @binding(4) var<uniform> s2x4f32 : S2x4F32;
+
+@group(3) @binding(4) var<uniform> s3x4f32 : S3x4F32;
+
+@group(4) @binding(4) var<uniform> s4x4f32 : S4x4F32;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+// In the following tests we only test `mat2x2<f32>` for matrix used as array element type and
+// `mat3x2<f32>` otherwise, and set all constant column index to 1, row index 0, inner array index
+// 2, and outer array index 3. For exhaustive tests, i.e. tests on all matrix shape and different
+// valid constant index, please refer to std140_exhaustive_test.cc
+
+TEST_F(Std140Test_F32, SingleStructMatUniform_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, CustomAlign_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  before : i32,
+  @align(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  before : i32,
+  @align(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  @align(128i)
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, CustomSizeMat_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  before : i32,
+  @size(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  before : i32,
+  @size(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(112)
+  m_2 : vec2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, CustomAlignAndSize_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  before : i32,
+  @align(128) @size(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  before : i32,
+  @align(128) @size(128)
+  m : mat3x2<f32>,
+  after : i32,
+}
+
+struct S_std140 {
+  before : i32,
+  @align(128i)
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(112)
+  m_2 : vec2<f32>,
+  after : i32,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatrixUsageInForLoop_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  for(var i = u32(s.m[0][0]); (i < u32(s.m[i][1])); i += u32(s.m[1][i])) {
+  }
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_1(p0 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[1u];
+    }
+    case 1u: {
+      return s.m_1[1u];
+    }
+    case 2u: {
+      return s.m_2[1u];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  for(var i = u32(s.m_0[0u]); (i < u32(load_s_m_p0_1(u32(i)))); i += u32(s.m_1[i])) {
+  }
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadMatrix_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> m : mat3x2<f32>;
+
+fn f() {
+  let l = m;
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> m : mat3x2_f32;
+
+fn conv_mat3x2_f32(val : mat3x2_f32) -> mat3x2<f32> {
+  return mat3x2<f32>(val.col0, val.col1, val.col2);
+}
+
+fn f() {
+  let l = conv_mat3x2_f32(m);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadColumn_ConstIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let l = a[1];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn f() {
+  let l = a.col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadColumn_VariableIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn load_a_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return a.col0;
+    }
+    case 1u: {
+      return a.col1;
+    }
+    case 2u: {
+      return a.col2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadColumnSwizzle_ConstIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let l = a[1].yx;
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn f() {
+  let l = a.col1.yx;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadColumnSwizzle_VariableIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].yx;
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn load_a_p0_yx(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return a.col0.yx;
+    }
+    case 1u: {
+      return a.col1.yx;
+    }
+    case 2u: {
+      return a.col2.yx;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_yx(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let l = a[1][0];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn f() {
+  let l = a.col1[0u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let I = 0;
+  let l = a[I][0];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn load_a_p0_0(p0 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return a.col0[0u];
+    }
+    case 1u: {
+      return a.col1[0u];
+    }
+    case 2u: {
+      return a.col2[0u];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_a_p0_0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let I = 0;
+  let l = a[1][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn f() {
+  let I = 0;
+  let l = a.col1[I];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, MatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat3x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
+
+fn f() {
+  let I = 0;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat3x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+  col2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : mat3x2_f32;
+
+fn load_a_p0_p1(p0 : u32, p1 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return a.col0[p1];
+    }
+    case 1u: {
+      return a.col1[p1];
+    }
+    case 2u: {
+      return a.col2[p1];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_NameCollision_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m_1 : i32,
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+)";
+
+    auto* expect = R"(
+struct S {
+  m_1 : i32,
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_1 : i32,
+  m__0 : vec2<f32>,
+  m__1 : vec2<f32>,
+  m__2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadStruct_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn f() {
+  let l = conv_S(s);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadMatrix_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m() -> mat3x2<f32> {
+  let s = &(s);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  let l = load_s_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadColumn_ConstIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadColumn_VariableIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return s.m_0;
+    }
+    case 1u: {
+      return s.m_1;
+    }
+    case 2u: {
+      return s.m_2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.m[1][0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.m_1[0u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I][0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_0(p0 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[0u];
+    }
+    case 1u: {
+      return s.m_1[0u];
+    }
+    case 2u: {
+      return s.m_2[0u];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0_0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[1][I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let I = 0;
+  let l = s.m_1[I];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 0;
+  let l = s.m[I][I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_m_p0_p1(p0 : u32, p1 : u32) -> f32 {
+  switch(p0) {
+    case 0u: {
+      return s.m_0[p1];
+    }
+    case 1u: {
+      return s.m_1[p1];
+    }
+    case 2u: {
+      return s.m_2[p1];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let l = load_s_m_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadArray_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn conv_arr3_S(val : array<S_std140, 3u>) -> array<S, 3u> {
+  var arr : array<S, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_S(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_S(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadStruct_ConstIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn f() {
+  let l = conv_S(a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadStruct_VariableIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_S(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadMatrix_ConstArrayIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2].m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_2_m() -> mat3x2<f32> {
+  let s = &(a[2u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  let l = load_a_2_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadMatrix_VariableArrayIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_p0_m(p0 : u32) -> mat3x2<f32> {
+  let s = &(a[p0]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_m(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let l = a[2].m[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let l = a[2u].m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m_1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[2].m[I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_2_m_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return a[2u].m_0;
+    }
+    case 1u: {
+      return a[2u].m_1;
+    }
+    case 2u: {
+      return a[2u].m_2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_2_m_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].m[I];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
+
+fn load_a_p0_m_p1(p0 : u32, p1 : u32) -> vec2<f32> {
+  switch(p1) {
+    case 0u: {
+      return a[p0].m_0;
+    }
+    case 1u: {
+      return a[p0].m_1;
+    }
+    case 2u: {
+      return a[p0].m_2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_m_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructArrayStructMatUniform_Loads_Mat3x2F32) {
+    auto* src = R"(
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let l_a : array<Outer, 4> = a;
+  let l_a_1 : Outer = a[1];
+  let l_a_I : Outer = a[I];
+  let l_a_2_a : array<Inner, 4> = a[2].a;
+  let l_a_I_a : array<Inner, 4> = a[I].a;
+  let l_a_3_a_1 : Inner = a[3].a[1];
+  let l_a_3_a_I : Inner = a[3].a[I];
+  let l_a_I_a_1 : Inner = a[I].a[1];
+  let l_a_I_a_J : Inner = a[I].a[J];
+  let l_a_0_a_2_m : mat3x2<f32> = a[0].a[2].m;
+  let l_a_0_a_I_m : mat3x2<f32> = a[0].a[I].m;
+  let l_a_I_a_2_m : mat3x2<f32> = a[I].a[2].m;
+  let l_a_I_a_J_m : mat3x2<f32> = a[I].a[J].m;
+  let l_a_1_a_3_m_0 : vec2<f32> = a[1].a[3].m[0];
+  let l_a_I_a_J_m_K : vec2<f32> = a[I].a[J].m[K];
+  let l_a_2_a_0_m_1_0 : f32 = a[2].a[0].m[1][0];
+  let l_a_I_a_J_m_K_I : f32 = a[I].a[J].m[K][I];
+}
+)";
+
+    auto* expect = R"(
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Inner_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+struct Outer_std140 {
+  a : array<Inner_std140, 4u>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
+
+fn conv_Inner(val : Inner_std140) -> Inner {
+  return Inner(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
+  var arr : array<Inner, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Inner(val[i]);
+  }
+  return arr;
+}
+
+fn conv_Outer(val : Outer_std140) -> Outer {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
+  var arr : array<Outer, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Outer(val[i]);
+  }
+  return arr;
+}
+
+fn load_a_0_a_2_m() -> mat3x2<f32> {
+  let s = &(a[0u].a[2u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_0_a_p0_m(p0 : u32) -> mat3x2<f32> {
+  let s = &(a[0u].a[p0]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_2_m(p0 : u32) -> mat3x2<f32> {
+  let s = &(a[p0].a[2u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat3x2<f32> {
+  let s = &(a[p0].a[p1]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0;
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1;
+    }
+    case 2u: {
+      return a[p0].a[p1].m_2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f32 {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0[p3];
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1[p3];
+    }
+    case 2u: {
+      return a[p0].a[p1].m_2[p3];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
+  let l_a_1 : Outer = conv_Outer(a[1u]);
+  let l_a_I : Outer = conv_Outer(a[I]);
+  let l_a_2_a : array<Inner, 4> = conv_arr4_Inner(a[2u].a);
+  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
+  let l_a_3_a_1 : Inner = conv_Inner(a[3u].a[1u]);
+  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
+  let l_a_I_a_1 : Inner = conv_Inner(a[I].a[1u]);
+  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
+  let l_a_0_a_2_m : mat3x2<f32> = load_a_0_a_2_m();
+  let l_a_0_a_I_m : mat3x2<f32> = load_a_0_a_p0_m(u32(I));
+  let l_a_I_a_2_m : mat3x2<f32> = load_a_p0_a_2_m(u32(I));
+  let l_a_I_a_J_m : mat3x2<f32> = load_a_p0_a_p1_m(u32(I), u32(J));
+  let l_a_1_a_3_m_0 : vec2<f32> = a[1u].a[3u].m_0;
+  let l_a_I_a_J_m_K : vec2<f32> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a_2_a_0_m_1_0 : f32 = a[2u].a[0u].m_1[0u];
+  let l_a_I_a_J_m_K_I : f32 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructArrayStructMatUniform_LoadsViaPtrs_Mat3x2F32) {
+    auto* src = R"(
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let p_a = &(a);
+  let p_a_3 = &((*(p_a))[3]);
+  let p_a_I = &((*(p_a))[I]);
+  let p_a_3_a = &((*(p_a_3)).a);
+  let p_a_I_a = &((*(p_a_I)).a);
+  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
+  let p_a_3_a_I = &((*(p_a_3_a))[I]);
+  let p_a_I_a_2 = &((*(p_a_I_a))[2]);
+  let p_a_I_a_J = &((*(p_a_I_a))[J]);
+  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
+  let p_a_3_a_I_m = &((*(p_a_3_a_I)).m);
+  let p_a_I_a_2_m = &((*(p_a_I_a_2)).m);
+  let p_a_I_a_J_m = &((*(p_a_I_a_J)).m);
+  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
+  let p_a_I_a_J_m_K = &((*(p_a_I_a_J_m))[K]);
+  let l_a : array<Outer, 4> = *(p_a);
+  let l_a_3 : Outer = *(p_a_3);
+  let l_a_I : Outer = *(p_a_I);
+  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
+  let l_a_I_a : array<Inner, 4> = *(p_a_I_a);
+  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
+  let l_a_3_a_I : Inner = *(p_a_3_a_I);
+  let l_a_I_a_2 : Inner = *(p_a_I_a_2);
+  let l_a_I_a_J : Inner = *(p_a_I_a_J);
+  let l_a_3_a_2_m : mat3x2<f32> = *(p_a_3_a_2_m);
+  let l_a_3_a_I_m : mat3x2<f32> = *(p_a_3_a_I_m);
+  let l_a_I_a_2_m : mat3x2<f32> = *(p_a_I_a_2_m);
+  let l_a_I_a_J_m : mat3x2<f32> = *(p_a_I_a_J_m);
+  let l_a_3_a_2_m_1 : vec2<f32> = *(p_a_3_a_2_m_1);
+  let l_a_I_a_J_m_K : vec2<f32> = *(p_a_I_a_J_m_K);
+  let l_a_2_a_0_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
+  let l_a_I_a_J_m_K_I : f32 = (*(p_a_I_a_J_m_K))[I];
+}
+)";
+
+    auto* expect = R"(
+struct Inner {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct Inner_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+struct Outer {
+  a : array<Inner, 4>,
+}
+
+struct Outer_std140 {
+  a : array<Inner_std140, 4u>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
+
+fn conv_Inner(val : Inner_std140) -> Inner {
+  return Inner(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
+  var arr : array<Inner, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Inner(val[i]);
+  }
+  return arr;
+}
+
+fn conv_Outer(val : Outer_std140) -> Outer {
+  return Outer(conv_arr4_Inner(val.a));
+}
+
+fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
+  var arr : array<Outer, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_Outer(val[i]);
+  }
+  return arr;
+}
+
+fn load_a_3_a_2_m() -> mat3x2<f32> {
+  let s = &(a[3u].a[2u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_3_a_p0_m(p0 : u32) -> mat3x2<f32> {
+  let s = &(a[3u].a[p0]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_2_m(p0 : u32) -> mat3x2<f32> {
+  let s = &(a[p0].a[2u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat3x2<f32> {
+  let s = &(a[p0].a[p1]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0;
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1;
+    }
+    case 2u: {
+      return a[p0].a[p1].m_2;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f32 {
+  switch(p2) {
+    case 0u: {
+      return a[p0].a[p1].m_0[p3];
+    }
+    case 1u: {
+      return a[p0].a[p1].m_1[p3];
+    }
+    case 2u: {
+      return a[p0].a[p1].m_2[p3];
+    }
+    default: {
+      return f32();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let K = 0;
+  let p_a = conv_arr4_Outer(a);
+  let p_a_3 = conv_Outer(a[3u]);
+  let p_a_I = conv_Outer(a[I]);
+  let p_a_3_a = conv_arr4_Inner(a[3u].a);
+  let p_a_I_a = conv_arr4_Inner(a[I].a);
+  let p_a_3_a_2 = conv_Inner(a[3u].a[2u]);
+  let p_a_3_a_I = conv_Inner(a[3u].a[I]);
+  let p_a_I_a_2 = conv_Inner(a[I].a[2u]);
+  let p_a_I_a_J = conv_Inner(a[I].a[J]);
+  let p_a_3_a_2_m = load_a_3_a_2_m();
+  let p_a_3_a_I_m = load_a_3_a_p0_m(u32(I));
+  let p_a_I_a_2_m = load_a_p0_a_2_m(u32(I));
+  let p_a_I_a_J_m = load_a_p0_a_p1_m(u32(I), u32(J));
+  let p_a_3_a_2_m_1 = a[3u].a[2u].m_1;
+  let p_a_I_a_J_m_K = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
+  let l_a_3 : Outer = conv_Outer(a[3u]);
+  let l_a_I : Outer = conv_Outer(a[I]);
+  let l_a_3_a : array<Inner, 4> = conv_arr4_Inner(a[3u].a);
+  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
+  let l_a_3_a_2 : Inner = conv_Inner(a[3u].a[2u]);
+  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
+  let l_a_I_a_2 : Inner = conv_Inner(a[I].a[2u]);
+  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
+  let l_a_3_a_2_m : mat3x2<f32> = load_a_3_a_2_m();
+  let l_a_3_a_I_m : mat3x2<f32> = load_a_3_a_p0_m(u32(I));
+  let l_a_I_a_2_m : mat3x2<f32> = load_a_p0_a_2_m(u32(I));
+  let l_a_I_a_J_m : mat3x2<f32> = load_a_p0_a_p1_m(u32(I), u32(J));
+  let l_a_3_a_2_m_1 : vec2<f32> = a[3u].a[2u].m_1;
+  let l_a_I_a_J_m_K : vec2<f32> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
+  let l_a_2_a_0_m_1_0 : f32 = a[3u].a[2u].m_1[0u];
+  let l_a_I_a_J_m_K_I : f32 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyArray_UniformToStorage_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s = u;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn conv_arr4_S(val : array<S_std140, 4u>) -> array<S, 4u> {
+  var arr : array<S, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_S(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  s = conv_arr4_S(u);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyStruct_UniformToWorkgroup_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[0] = u[1];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+var<workgroup> w : array<S, 4>;
+
+fn conv_S(val : S_std140) -> S {
+  return S(val.v, mat3x2<f32>(val.m_0, val.m_1, val.m_2));
+}
+
+fn f() {
+  w[0] = conv_S(u[1u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyMatrix_UniformToPrivate_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[2].m = u[1].m;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+var<private> p : array<S, 4>;
+
+fn load_u_1_m() -> mat3x2<f32> {
+  let s = &(u[1u]);
+  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
+}
+
+fn f() {
+  p[2].m = load_u_1_m();
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyColumn_UniformToStorage_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s[3].m[1] = u[2].m[0];
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
+
+fn f() {
+  s[3].m[1] = u[2u].m_0;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyColumnSwizzle_UniformToWorkgroup_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 4>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[3].m[1] = u[2].m[0].yx.yx;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
+
+var<workgroup> w : array<S, 4>;
+
+fn f() {
+  w[3].m[1] = u[2u].m_0.yx.yx;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyScalar_UniformToPrivate_Mat3x2F32) {
+    auto* src = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S, 3>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[3].m[1].x = u[2].m[0].y;
+}
+)";
+
+    auto* expect = R"(
+struct S {
+  v : vec4<i32>,
+  @size(64)
+  m : mat3x2<f32>,
+}
+
+struct S_std140 {
+  v : vec4<i32>,
+  m_0 : vec2<f32>,
+  m_1 : vec2<f32>,
+  @size(48)
+  m_2 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
+
+var<private> p : array<S, 4>;
+
+fn f() {
+  p[3].m[1].x = u[2u].m_0[1u];
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadArray_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x2_f32(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let l = a[2];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x2_f32(a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x2_f32(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let l = a[2][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn f() {
+  let l = a[2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[2][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn load_a_2_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return a[2u].col0;
+    }
+    case 1u: {
+      return a[2u].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
+
+fn load_a_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
+  switch(p1) {
+    case 0u: {
+      return a[p0].col0;
+    }
+    case 1u: {
+      return a[p0].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructArrayMatUniform_LoadStruct_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s;
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn conv_S(val : S_std140) -> S {
+  return S(conv_arr3_mat2x2_f32(val.a));
+}
+
+fn f() {
+  let l = conv_S(s);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructArrayMatUniform_LoadArray_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a;
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x2_f32(s.a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a[2];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x2_f32(s.a[2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, StructArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x2_f32(s.a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       StructArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let l = s.a[2][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let l = s.a[2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       StructArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       StructArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[2][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_a_2_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return s.a[2u].col0;
+    }
+    case 1u: {
+      return s.a[2u].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_s_a_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       StructArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+@group(0) @binding(0) var<uniform> s : S;
+
+fn f() {
+  let I = 1;
+  let l = s.a[I][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+struct S {
+  a : array<mat2x2<f32>, 3>,
+}
+
+struct S_std140 {
+  a : array<mat2x2_f32, 3u>,
+}
+
+@group(0) @binding(0) var<uniform> s : S_std140;
+
+fn load_s_a_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
+  switch(p1) {
+    case 0u: {
+      return s.a[p0].col0;
+    }
+    case 1u: {
+      return s.a[p0].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_s_a_p0_p1(u32(I), u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArrays_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let l = a;
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn conv_arr4_arr3_mat2x2_f32(val : array<array<mat2x2_f32, 3u>, 4u>) -> array<array<mat2x2<f32>, 3u>, 4u> {
+  var arr : array<array<mat2x2<f32>, 3u>, 4u>;
+  for(var i : u32; (i < 4u); i = (i + 1)) {
+    arr[i] = conv_arr3_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr4_arr3_mat2x2_f32(a);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArray_ConstOuterArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let l = a[3];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let l = conv_arr3_mat2x2_f32(a[3u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArray_VariableOuterArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
+  var arr : array<mat2x2<f32>, 3u>;
+  for(var i : u32; (i < 3u); i = (i + 1)) {
+    arr[i] = conv_mat2x2_f32(val[i]);
+  }
+  return arr;
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_arr3_mat2x2_f32(a[I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_ConstInnerArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let l = a[3][2];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let l = conv_mat2x2_f32(a[3u][2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_VariableInnerArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x2_f32(a[3u][I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_ConstInnerArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x2_f32(a[I][2u]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(Std140Test_F32,
+       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_VariableInnerArrayIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
+  return mat2x2<f32>(val.col0, val.col1);
+}
+
+fn f() {
+  let I = 1;
+  let l = conv_mat2x2_f32(a[I][I]);
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let l = a[3][2][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn f() {
+  let l = a[3u][2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][2][I];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn load_a_3_2_p0(p0 : u32) -> vec2<f32> {
+  switch(p0) {
+    case 0u: {
+      return a[3u][2u].col0;
+    }
+    case 1u: {
+      return a[3u][2u].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let l = load_a_3_2_p0(u32(I));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[3][I][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let l = a[3u][I].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[3][I][J];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn load_a_3_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
+  switch(p1) {
+    case 0u: {
+      return a[3u][p0].col0;
+    }
+    case 1u: {
+      return a[3u][p0].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = load_a_3_p0_p1(u32(I), u32(J));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let l = a[I][2u].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][2][J];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn load_a_p0_2_p1(p0 : u32, p1 : u32) -> vec2<f32> {
+  switch(p1) {
+    case 0u: {
+      return a[p0][2u].col0;
+    }
+    case 1u: {
+      return a[p0][2u].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = load_a_p0_2_p1(u32(I), u32(J));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][J][1];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn f() {
+  let I = 1;
+  let J = 2;
+  let l = a[I][J].col1;
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+TEST_F(
+    Std140Test_F32,
+    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
+    auto* src = R"(
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
+
+fn f() {
+  let I = 0;
+  let J = 1;
+  let K = 2;
+  let l = a[I][J][K];
+}
+)";
+
+    auto* expect = R"(
+struct mat2x2_f32 {
+  col0 : vec2<f32>,
+  col1 : vec2<f32>,
+}
+
+@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
+
+fn load_a_p0_p1_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
+  switch(p2) {
+    case 0u: {
+      return a[p0][p1].col0;
+    }
+    case 1u: {
+      return a[p0][p1].col1;
+    }
+    default: {
+      return vec2<f32>();
+    }
+  }
+}
+
+fn f() {
+  let I = 0;
+  let J = 1;
+  let K = 2;
+  let l = load_a_p0_p1_p2(u32(I), u32(J), u32(K));
+}
+)";
+
+    auto got = Run<Std140>(src);
+
+    EXPECT_EQ(expect, str(got));
+}
+
+}  // namespace
+}  // namespace tint::transform

diff --git a/src/tint/transform/std140_test.cc b/src/tint/transform/std140_test.cc
index 1ec2e09..73221bc 100644
--- a/src/tint/transform/std140_test.cc
+++ b/src/tint/transform/std140_test.cc

@@ -21,6 +21,12 @@
 #include "src/tint/transform/test_helper.h"
 #include "src/tint/utils/string.h"
 
+// This file contains the should-run tests and a trival empty module test for Std140 transform.
+// For testing transform results with clear readability, please refer to std140_f32_test.cc for f32
+// matricies and std140_f16_test.cc for f16 matricies. For exhaustive tests that run Std140
+// transform on all shape of both f32 and f16 matricies and loop on all valid literal index when
+// required, please refer to std140_exhaustive_test.cc.
+
 namespace tint::transform {
 namespace {
 
@@ -96,6 +102,8 @@
 
 TEST_P(Std140TestShouldRun, StructStorage) {
     std::string src = R"(
+enable f16;
+
 struct S {
   m : ${mat},
 }
@@ -110,6 +118,8 @@
 
 TEST_P(Std140TestShouldRun, StructUniform) {
     std::string src = R"(
+enable f16;
+
 struct S {
   m : ${mat},
 }
@@ -124,6 +134,8 @@
 
 TEST_P(Std140TestShouldRun, ArrayStorage) {
     std::string src = R"(
+enable f16;
+
 @group(0) @binding(0) var<storage> s : array<${mat}, 2>;
 )";
 
@@ -141,6 +153,8 @@
     }
 
     std::string src = R"(
+enable f16;
+
 @group(0) @binding(0) var<uniform> s : array<${mat}, 2>;
 )";
 
@@ -161,6 +175,15 @@
                              {4, 2, MatrixType::f32},
                              {4, 3, MatrixType::f32},
                              {4, 4, MatrixType::f32},
+                             {2, 2, MatrixType::f16},
+                             {2, 3, MatrixType::f16},
+                             {2, 4, MatrixType::f16},
+                             {3, 2, MatrixType::f16},
+                             {3, 3, MatrixType::f16},
+                             {3, 4, MatrixType::f16},
+                             {4, 2, MatrixType::f16},
+                             {4, 3, MatrixType::f16},
+                             {4, 4, MatrixType::f16},
                          }));
 
 TEST_F(Std140Test, EmptyModule) {
@@ -173,3336 +196,5 @@
     EXPECT_EQ(expect, str(got));
 }
 
-using Std140Test_F32 = Std140Test;
-
-TEST_F(Std140Test_F32, StructMatricesUniform) {
-    auto* src = R"(
-struct S2x2F32 {
-  m : mat2x2<f32>,
-}
-struct S3x2F32 {
-  m : mat3x2<f32>,
-}
-struct S4x2F32 {
-  m : mat4x2<f32>,
-}
-struct S2x3F32 {
-  m : mat2x3<f32>,
-}
-struct S3x3F32 {
-  m : mat3x3<f32>,
-}
-struct S4x3F32 {
-  m : mat4x3<f32>,
-}
-struct S2x4F32 {
-  m : mat2x4<f32>,
-}
-struct S3x4F32 {
-  m : mat3x4<f32>,
-}
-struct S4x4F32 {
-  m : mat4x4<f32>,
-}
-
-@group(2) @binding(2) var<uniform> s2x2f32 : S2x2F32;
-@group(3) @binding(2) var<uniform> s3x2f32 : S3x2F32;
-@group(4) @binding(2) var<uniform> s4x2f32 : S4x2F32;
-@group(2) @binding(3) var<uniform> s2x3f32 : S2x3F32;
-@group(3) @binding(3) var<uniform> s3x3f32 : S3x3F32;
-@group(4) @binding(3) var<uniform> s4x3f32 : S4x3F32;
-@group(2) @binding(4) var<uniform> s2x4f32 : S2x4F32;
-@group(3) @binding(4) var<uniform> s3x4f32 : S3x4F32;
-@group(4) @binding(4) var<uniform> s4x4f32 : S4x4F32;
-)";
-
-    auto* expect = R"(
-struct S2x2F32 {
-  m : mat2x2<f32>,
-}
-
-struct S2x2F32_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-}
-
-struct S3x2F32 {
-  m : mat3x2<f32>,
-}
-
-struct S3x2F32_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-struct S4x2F32 {
-  m : mat4x2<f32>,
-}
-
-struct S4x2F32_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-  m_3 : vec2<f32>,
-}
-
-struct S2x3F32 {
-  m : mat2x3<f32>,
-}
-
-struct S3x3F32 {
-  m : mat3x3<f32>,
-}
-
-struct S4x3F32 {
-  m : mat4x3<f32>,
-}
-
-struct S2x4F32 {
-  m : mat2x4<f32>,
-}
-
-struct S3x4F32 {
-  m : mat3x4<f32>,
-}
-
-struct S4x4F32 {
-  m : mat4x4<f32>,
-}
-
-@group(2) @binding(2) var<uniform> s2x2f32 : S2x2F32_std140;
-
-@group(3) @binding(2) var<uniform> s3x2f32 : S3x2F32_std140;
-
-@group(4) @binding(2) var<uniform> s4x2f32 : S4x2F32_std140;
-
-@group(2) @binding(3) var<uniform> s2x3f32 : S2x3F32;
-
-@group(3) @binding(3) var<uniform> s3x3f32 : S3x3F32;
-
-@group(4) @binding(3) var<uniform> s4x3f32 : S4x3F32;
-
-@group(2) @binding(4) var<uniform> s2x4f32 : S2x4F32;
-
-@group(3) @binding(4) var<uniform> s3x4f32 : S3x4F32;
-
-@group(4) @binding(4) var<uniform> s4x4f32 : S4x4F32;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-// In the following tests we only test `mat2x2<f32>` for matrix used as array element type and
-// `mat3x2<f32>` otherwise, and set all constant column index to 1, row index 0, inner array index
-// 2, and outer array index 3. For exhaustive tests, i.e. tests on all matrix shape and different
-// valid constant index, please refer to std140_exhaustive_test.cc
-
-TEST_F(Std140Test_F32, SingleStructMatUniform_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, CustomAlign_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  before : i32,
-  @align(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-)";
-
-    auto* expect = R"(
-struct S {
-  before : i32,
-  @align(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-struct S_std140 {
-  before : i32,
-  @align(128i)
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, CustomSizeMat_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  before : i32,
-  @size(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-)";
-
-    auto* expect = R"(
-struct S {
-  before : i32,
-  @size(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-struct S_std140 {
-  before : i32,
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(112)
-  m_2 : vec2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, CustomAlignAndSize_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  before : i32,
-  @align(128) @size(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-)";
-
-    auto* expect = R"(
-struct S {
-  before : i32,
-  @align(128) @size(128)
-  m : mat3x2<f32>,
-  after : i32,
-}
-
-struct S_std140 {
-  before : i32,
-  @align(128i)
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(112)
-  m_2 : vec2<f32>,
-  after : i32,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatrixUsageInForLoop_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  for(var i = u32(s.m[0][0]); (i < u32(s.m[i][1])); i += u32(s.m[1][i])) {
-  }
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_m_p0_1(p0 : u32) -> f32 {
-  switch(p0) {
-    case 0u: {
-      return s.m_0[1u];
-    }
-    case 1u: {
-      return s.m_1[1u];
-    }
-    case 2u: {
-      return s.m_2[1u];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  for(var i = u32(s.m_0[0u]); (i < u32(load_s_m_p0_1(u32(i)))); i += u32(s.m_1[i])) {
-  }
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadMatrix_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> m : mat3x2<f32>;
-
-fn f() {
-  let l = m;
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> m : mat3x2_f32;
-
-fn conv_mat3x2_f32(val : mat3x2_f32) -> mat3x2<f32> {
-  return mat3x2<f32>(val.col0, val.col1, val.col2);
-}
-
-fn f() {
-  let l = conv_mat3x2_f32(m);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadColumn_ConstIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let l = a[1];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn f() {
-  let l = a.col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadColumn_VariableIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let I = 1;
-  let l = a[I];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn load_a_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return a.col0;
-    }
-    case 1u: {
-      return a.col1;
-    }
-    case 2u: {
-      return a.col2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadColumnSwizzle_ConstIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let l = a[1].yx;
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn f() {
-  let l = a.col1.yx;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadColumnSwizzle_VariableIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].yx;
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn load_a_p0_yx(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return a.col0.yx;
-    }
-    case 1u: {
-      return a.col1.yx;
-    }
-    case 2u: {
-      return a.col2.yx;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_p0_yx(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let l = a[1][0];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn f() {
-  let l = a.col1[0u];
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let I = 0;
-  let l = a[I][0];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn load_a_p0_0(p0 : u32) -> f32 {
-  switch(p0) {
-    case 0u: {
-      return a.col0[0u];
-    }
-    case 1u: {
-      return a.col1[0u];
-    }
-    case 2u: {
-      return a.col2[0u];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let l = load_a_p0_0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let I = 0;
-  let l = a[1][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn f() {
-  let I = 0;
-  let l = a.col1[I];
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, MatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat3x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : mat3x2<f32>;
-
-fn f() {
-  let I = 0;
-  let l = a[I][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat3x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-  col2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : mat3x2_f32;
-
-fn load_a_p0_p1(p0 : u32, p1 : u32) -> f32 {
-  switch(p0) {
-    case 0u: {
-      return a.col0[p1];
-    }
-    case 1u: {
-      return a.col1[p1];
-    }
-    case 2u: {
-      return a.col2[p1];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let l = load_a_p0_p1(u32(I), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_NameCollision_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m_1 : i32,
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-)";
-
-    auto* expect = R"(
-struct S {
-  m_1 : i32,
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_1 : i32,
-  m__0 : vec2<f32>,
-  m__1 : vec2<f32>,
-  m__2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadStruct_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn conv_S(val : S_std140) -> S {
-  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn f() {
-  let l = conv_S(s);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadMatrix_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.m;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_m() -> mat3x2<f32> {
-  let s = &(s);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn f() {
-  let l = load_s_m();
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadColumn_ConstIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.m[1];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn f() {
-  let l = s.m_1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadColumn_VariableIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 0;
-  let l = s.m[I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_m_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return s.m_0;
-    }
-    case 1u: {
-      return s.m_1;
-    }
-    case 2u: {
-      return s.m_2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let l = load_s_m_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_ConstColumnIndex_ConstRowIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.m[1][0];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn f() {
-  let l = s.m_1[0u];
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_VariableColumnIndex_ConstRowIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 0;
-  let l = s.m[I][0];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_m_p0_0(p0 : u32) -> f32 {
-  switch(p0) {
-    case 0u: {
-      return s.m_0[0u];
-    }
-    case 1u: {
-      return s.m_1[0u];
-    }
-    case 2u: {
-      return s.m_2[0u];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let l = load_s_m_p0_0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_ConstColumnIndex_VariableRowIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 0;
-  let l = s.m[1][I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn f() {
-  let I = 0;
-  let l = s.m_1[I];
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructMatUniform_LoadScalar_VariableColumnIndex_VariableRowIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 0;
-  let l = s.m[I][I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_m_p0_p1(p0 : u32, p1 : u32) -> f32 {
-  switch(p0) {
-    case 0u: {
-      return s.m_0[p1];
-    }
-    case 1u: {
-      return s.m_1[p1];
-    }
-    case 2u: {
-      return s.m_2[p1];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let l = load_s_m_p0_p1(u32(I), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadArray_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let l = a;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn conv_S(val : S_std140) -> S {
-  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn conv_arr3_S(val : array<S_std140, 3u>) -> array<S, 3u> {
-  var arr : array<S, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_S(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let l = conv_arr3_S(a);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadStruct_ConstIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let l = a[2];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn conv_S(val : S_std140) -> S {
-  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn f() {
-  let l = conv_S(a[2u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadStruct_VariableIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn conv_S(val : S_std140) -> S {
-  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_S(a[I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadMatrix_ConstArrayIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let l = a[2].m;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn load_a_2_m() -> mat3x2<f32> {
-  let s = &(a[2u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn f() {
-  let l = load_a_2_m();
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_LoadMatrix_VariableArrayIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].m;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn load_a_p0_m(p0 : u32) -> mat3x2<f32> {
-  let s = &(a[p0]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_p0_m(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let l = a[2].m[1];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn f() {
-  let l = a[2u].m_1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].m[1];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].m_1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayStructMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[2].m[I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn load_a_2_m_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return a[2u].m_0;
-    }
-    case 1u: {
-      return a[2u].m_1;
-    }
-    case 2u: {
-      return a[2u].m_2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_2_m_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayStructMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].m[I];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<S_std140, 3u>;
-
-fn load_a_p0_m_p1(p0 : u32, p1 : u32) -> vec2<f32> {
-  switch(p1) {
-    case 0u: {
-      return a[p0].m_0;
-    }
-    case 1u: {
-      return a[p0].m_1;
-    }
-    case 2u: {
-      return a[p0].m_2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_p0_m_p1(u32(I), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructArrayStructMatUniform_Loads_Mat3x2F32) {
-    auto* src = R"(
-struct Inner {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let K = 0;
-  let l_a : array<Outer, 4> = a;
-  let l_a_1 : Outer = a[1];
-  let l_a_I : Outer = a[I];
-  let l_a_2_a : array<Inner, 4> = a[2].a;
-  let l_a_I_a : array<Inner, 4> = a[I].a;
-  let l_a_3_a_1 : Inner = a[3].a[1];
-  let l_a_3_a_I : Inner = a[3].a[I];
-  let l_a_I_a_1 : Inner = a[I].a[1];
-  let l_a_I_a_J : Inner = a[I].a[J];
-  let l_a_0_a_2_m : mat3x2<f32> = a[0].a[2].m;
-  let l_a_0_a_I_m : mat3x2<f32> = a[0].a[I].m;
-  let l_a_I_a_2_m : mat3x2<f32> = a[I].a[2].m;
-  let l_a_I_a_J_m : mat3x2<f32> = a[I].a[J].m;
-  let l_a_1_a_3_m_0 : vec2<f32> = a[1].a[3].m[0];
-  let l_a_I_a_J_m_K : vec2<f32> = a[I].a[J].m[K];
-  let l_a_2_a_0_m_1_0 : f32 = a[2].a[0].m[1][0];
-  let l_a_I_a_J_m_K_I : f32 = a[I].a[J].m[K][I];
-}
-)";
-
-    auto* expect = R"(
-struct Inner {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct Inner_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-struct Outer_std140 {
-  a : array<Inner_std140, 4u>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
-
-fn conv_Inner(val : Inner_std140) -> Inner {
-  return Inner(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
-  var arr : array<Inner, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_Inner(val[i]);
-  }
-  return arr;
-}
-
-fn conv_Outer(val : Outer_std140) -> Outer {
-  return Outer(conv_arr4_Inner(val.a));
-}
-
-fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
-  var arr : array<Outer, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_Outer(val[i]);
-  }
-  return arr;
-}
-
-fn load_a_0_a_2_m() -> mat3x2<f32> {
-  let s = &(a[0u].a[2u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_0_a_p0_m(p0 : u32) -> mat3x2<f32> {
-  let s = &(a[0u].a[p0]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_2_m(p0 : u32) -> mat3x2<f32> {
-  let s = &(a[p0].a[2u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat3x2<f32> {
-  let s = &(a[p0].a[p1]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
-  switch(p2) {
-    case 0u: {
-      return a[p0].a[p1].m_0;
-    }
-    case 1u: {
-      return a[p0].a[p1].m_1;
-    }
-    case 2u: {
-      return a[p0].a[p1].m_2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f32 {
-  switch(p2) {
-    case 0u: {
-      return a[p0].a[p1].m_0[p3];
-    }
-    case 1u: {
-      return a[p0].a[p1].m_1[p3];
-    }
-    case 2u: {
-      return a[p0].a[p1].m_2[p3];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let K = 0;
-  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
-  let l_a_1 : Outer = conv_Outer(a[1u]);
-  let l_a_I : Outer = conv_Outer(a[I]);
-  let l_a_2_a : array<Inner, 4> = conv_arr4_Inner(a[2u].a);
-  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
-  let l_a_3_a_1 : Inner = conv_Inner(a[3u].a[1u]);
-  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
-  let l_a_I_a_1 : Inner = conv_Inner(a[I].a[1u]);
-  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
-  let l_a_0_a_2_m : mat3x2<f32> = load_a_0_a_2_m();
-  let l_a_0_a_I_m : mat3x2<f32> = load_a_0_a_p0_m(u32(I));
-  let l_a_I_a_2_m : mat3x2<f32> = load_a_p0_a_2_m(u32(I));
-  let l_a_I_a_J_m : mat3x2<f32> = load_a_p0_a_p1_m(u32(I), u32(J));
-  let l_a_1_a_3_m_0 : vec2<f32> = a[1u].a[3u].m_0;
-  let l_a_I_a_J_m_K : vec2<f32> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
-  let l_a_2_a_0_m_1_0 : f32 = a[2u].a[0u].m_1[0u];
-  let l_a_I_a_J_m_K_I : f32 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructArrayStructMatUniform_LoadsViaPtrs_Mat3x2F32) {
-    auto* src = R"(
-struct Inner {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer, 4>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let K = 0;
-  let p_a = &(a);
-  let p_a_3 = &((*(p_a))[3]);
-  let p_a_I = &((*(p_a))[I]);
-  let p_a_3_a = &((*(p_a_3)).a);
-  let p_a_I_a = &((*(p_a_I)).a);
-  let p_a_3_a_2 = &((*(p_a_3_a))[2]);
-  let p_a_3_a_I = &((*(p_a_3_a))[I]);
-  let p_a_I_a_2 = &((*(p_a_I_a))[2]);
-  let p_a_I_a_J = &((*(p_a_I_a))[J]);
-  let p_a_3_a_2_m = &((*(p_a_3_a_2)).m);
-  let p_a_3_a_I_m = &((*(p_a_3_a_I)).m);
-  let p_a_I_a_2_m = &((*(p_a_I_a_2)).m);
-  let p_a_I_a_J_m = &((*(p_a_I_a_J)).m);
-  let p_a_3_a_2_m_1 = &((*(p_a_3_a_2_m))[1]);
-  let p_a_I_a_J_m_K = &((*(p_a_I_a_J_m))[K]);
-  let l_a : array<Outer, 4> = *(p_a);
-  let l_a_3 : Outer = *(p_a_3);
-  let l_a_I : Outer = *(p_a_I);
-  let l_a_3_a : array<Inner, 4> = *(p_a_3_a);
-  let l_a_I_a : array<Inner, 4> = *(p_a_I_a);
-  let l_a_3_a_2 : Inner = *(p_a_3_a_2);
-  let l_a_3_a_I : Inner = *(p_a_3_a_I);
-  let l_a_I_a_2 : Inner = *(p_a_I_a_2);
-  let l_a_I_a_J : Inner = *(p_a_I_a_J);
-  let l_a_3_a_2_m : mat3x2<f32> = *(p_a_3_a_2_m);
-  let l_a_3_a_I_m : mat3x2<f32> = *(p_a_3_a_I_m);
-  let l_a_I_a_2_m : mat3x2<f32> = *(p_a_I_a_2_m);
-  let l_a_I_a_J_m : mat3x2<f32> = *(p_a_I_a_J_m);
-  let l_a_3_a_2_m_1 : vec2<f32> = *(p_a_3_a_2_m_1);
-  let l_a_I_a_J_m_K : vec2<f32> = *(p_a_I_a_J_m_K);
-  let l_a_2_a_0_m_1_0 : f32 = (*(p_a_3_a_2_m_1))[0];
-  let l_a_I_a_J_m_K_I : f32 = (*(p_a_I_a_J_m_K))[I];
-}
-)";
-
-    auto* expect = R"(
-struct Inner {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct Inner_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-struct Outer {
-  a : array<Inner, 4>,
-}
-
-struct Outer_std140 {
-  a : array<Inner_std140, 4u>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<Outer_std140, 4u>;
-
-fn conv_Inner(val : Inner_std140) -> Inner {
-  return Inner(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn conv_arr4_Inner(val : array<Inner_std140, 4u>) -> array<Inner, 4u> {
-  var arr : array<Inner, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_Inner(val[i]);
-  }
-  return arr;
-}
-
-fn conv_Outer(val : Outer_std140) -> Outer {
-  return Outer(conv_arr4_Inner(val.a));
-}
-
-fn conv_arr4_Outer(val : array<Outer_std140, 4u>) -> array<Outer, 4u> {
-  var arr : array<Outer, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_Outer(val[i]);
-  }
-  return arr;
-}
-
-fn load_a_3_a_2_m() -> mat3x2<f32> {
-  let s = &(a[3u].a[2u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_3_a_p0_m(p0 : u32) -> mat3x2<f32> {
-  let s = &(a[3u].a[p0]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_2_m(p0 : u32) -> mat3x2<f32> {
-  let s = &(a[p0].a[2u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_p1_m(p0 : u32, p1 : u32) -> mat3x2<f32> {
-  let s = &(a[p0].a[p1]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn load_a_p0_a_p1_m_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
-  switch(p2) {
-    case 0u: {
-      return a[p0].a[p1].m_0;
-    }
-    case 1u: {
-      return a[p0].a[p1].m_1;
-    }
-    case 2u: {
-      return a[p0].a[p1].m_2;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn load_a_p0_a_p1_m_p2_p3(p0 : u32, p1 : u32, p2 : u32, p3 : u32) -> f32 {
-  switch(p2) {
-    case 0u: {
-      return a[p0].a[p1].m_0[p3];
-    }
-    case 1u: {
-      return a[p0].a[p1].m_1[p3];
-    }
-    case 2u: {
-      return a[p0].a[p1].m_2[p3];
-    }
-    default: {
-      return f32();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let K = 0;
-  let p_a = conv_arr4_Outer(a);
-  let p_a_3 = conv_Outer(a[3u]);
-  let p_a_I = conv_Outer(a[I]);
-  let p_a_3_a = conv_arr4_Inner(a[3u].a);
-  let p_a_I_a = conv_arr4_Inner(a[I].a);
-  let p_a_3_a_2 = conv_Inner(a[3u].a[2u]);
-  let p_a_3_a_I = conv_Inner(a[3u].a[I]);
-  let p_a_I_a_2 = conv_Inner(a[I].a[2u]);
-  let p_a_I_a_J = conv_Inner(a[I].a[J]);
-  let p_a_3_a_2_m = load_a_3_a_2_m();
-  let p_a_3_a_I_m = load_a_3_a_p0_m(u32(I));
-  let p_a_I_a_2_m = load_a_p0_a_2_m(u32(I));
-  let p_a_I_a_J_m = load_a_p0_a_p1_m(u32(I), u32(J));
-  let p_a_3_a_2_m_1 = a[3u].a[2u].m_1;
-  let p_a_I_a_J_m_K = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
-  let l_a : array<Outer, 4> = conv_arr4_Outer(a);
-  let l_a_3 : Outer = conv_Outer(a[3u]);
-  let l_a_I : Outer = conv_Outer(a[I]);
-  let l_a_3_a : array<Inner, 4> = conv_arr4_Inner(a[3u].a);
-  let l_a_I_a : array<Inner, 4> = conv_arr4_Inner(a[I].a);
-  let l_a_3_a_2 : Inner = conv_Inner(a[3u].a[2u]);
-  let l_a_3_a_I : Inner = conv_Inner(a[3u].a[I]);
-  let l_a_I_a_2 : Inner = conv_Inner(a[I].a[2u]);
-  let l_a_I_a_J : Inner = conv_Inner(a[I].a[J]);
-  let l_a_3_a_2_m : mat3x2<f32> = load_a_3_a_2_m();
-  let l_a_3_a_I_m : mat3x2<f32> = load_a_3_a_p0_m(u32(I));
-  let l_a_I_a_2_m : mat3x2<f32> = load_a_p0_a_2_m(u32(I));
-  let l_a_I_a_J_m : mat3x2<f32> = load_a_p0_a_p1_m(u32(I), u32(J));
-  let l_a_3_a_2_m_1 : vec2<f32> = a[3u].a[2u].m_1;
-  let l_a_I_a_J_m_K : vec2<f32> = load_a_p0_a_p1_m_p2(u32(I), u32(J), u32(K));
-  let l_a_2_a_0_m_1_0 : f32 = a[3u].a[2u].m_1[0u];
-  let l_a_I_a_J_m_K_I : f32 = load_a_p0_a_p1_m_p2_p3(u32(I), u32(J), u32(K), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyArray_UniformToStorage_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-fn f() {
-  s = u;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-fn conv_S(val : S_std140) -> S {
-  return S(mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn conv_arr4_S(val : array<S_std140, 4u>) -> array<S, 4u> {
-  var arr : array<S, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_S(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  s = conv_arr4_S(u);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyStruct_UniformToWorkgroup_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-var<workgroup> w : array<S, 4>;
-
-fn f() {
-  w[0] = u[1];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  v : vec4<i32>,
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
-
-var<workgroup> w : array<S, 4>;
-
-fn conv_S(val : S_std140) -> S {
-  return S(val.v, mat3x2<f32>(val.m_0, val.m_1, val.m_2));
-}
-
-fn f() {
-  w[0] = conv_S(u[1u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyMatrix_UniformToPrivate_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 3>;
-
-var<private> p : array<S, 4>;
-
-fn f() {
-  p[2].m = u[1].m;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  v : vec4<i32>,
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
-
-var<private> p : array<S, 4>;
-
-fn load_u_1_m() -> mat3x2<f32> {
-  let s = &(u[1u]);
-  return mat3x2<f32>((*(s)).m_0, (*(s)).m_1, (*(s)).m_2);
-}
-
-fn f() {
-  p[2].m = load_u_1_m();
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyColumn_UniformToStorage_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 3>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-fn f() {
-  s[3].m[1] = u[2].m[0];
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
-
-@group(0) @binding(1) var<storage, read_write> s : array<S, 4>;
-
-fn f() {
-  s[3].m[1] = u[2u].m_0;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyColumnSwizzle_UniformToWorkgroup_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 4>;
-
-var<workgroup> w : array<S, 4>;
-
-fn f() {
-  w[3].m[1] = u[2].m[0].yx.yx;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 4u>;
-
-var<workgroup> w : array<S, 4>;
-
-fn f() {
-  w[3].m[1] = u[2u].m_0.yx.yx;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayStructMatUniform_CopyScalar_UniformToPrivate_Mat3x2F32) {
-    auto* src = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S, 3>;
-
-var<private> p : array<S, 4>;
-
-fn f() {
-  p[3].m[1].x = u[2].m[0].y;
-}
-)";
-
-    auto* expect = R"(
-struct S {
-  v : vec4<i32>,
-  @size(64)
-  m : mat3x2<f32>,
-}
-
-struct S_std140 {
-  v : vec4<i32>,
-  m_0 : vec2<f32>,
-  m_1 : vec2<f32>,
-  @size(48)
-  m_2 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> u : array<S_std140, 3u>;
-
-var<private> p : array<S, 4>;
-
-fn f() {
-  p[3].m[1].x = u[2u].m_0[1u];
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadArray_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let l = a;
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let l = conv_arr3_mat2x2_f32(a);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let l = a[2];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let l = conv_mat2x2_f32(a[2u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_mat2x2_f32(a[I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let l = a[2][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn f() {
-  let l = a[2u].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn f() {
-  let I = 1;
-  let l = a[I].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[2][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn load_a_2_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return a[2u].col0;
-    }
-    case 1u: {
-      return a[2u].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_2_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<mat2x2<f32>, 3>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<mat2x2_f32, 3u>;
-
-fn load_a_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
-  switch(p1) {
-    case 0u: {
-      return a[p0].col0;
-    }
-    case 1u: {
-      return a[p0].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_p0_p1(u32(I), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructArrayMatUniform_LoadStruct_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s;
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn conv_S(val : S_std140) -> S {
-  return S(conv_arr3_mat2x2_f32(val.a));
-}
-
-fn f() {
-  let l = conv_S(s);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructArrayMatUniform_LoadArray_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.a;
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let l = conv_arr3_mat2x2_f32(s.a);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructArrayMatUniform_LoadMatrix_ConstArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.a[2];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let l = conv_mat2x2_f32(s.a[2u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, StructArrayMatUniform_LoadMatrix_VariableArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 1;
-  let l = s.a[I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_mat2x2_f32(s.a[I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       StructArrayMatUniform_LoadColumn_ConstArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let l = s.a[2][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn f() {
-  let l = s.a[2u].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       StructArrayMatUniform_LoadColumn_VariableArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 1;
-  let l = s.a[I][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn f() {
-  let I = 1;
-  let l = s.a[I].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       StructArrayMatUniform_LoadColumn_ConstArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 1;
-  let l = s.a[2][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_a_2_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return s.a[2u].col0;
-    }
-    case 1u: {
-      return s.a[2u].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_s_a_2_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       StructArrayMatUniform_LoadColumn_VariableArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-@group(0) @binding(0) var<uniform> s : S;
-
-fn f() {
-  let I = 1;
-  let l = s.a[I][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-struct S {
-  a : array<mat2x2<f32>, 3>,
-}
-
-struct S_std140 {
-  a : array<mat2x2_f32, 3u>,
-}
-
-@group(0) @binding(0) var<uniform> s : S_std140;
-
-fn load_s_a_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
-  switch(p1) {
-    case 0u: {
-      return s.a[p0].col0;
-    }
-    case 1u: {
-      return s.a[p0].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_s_a_p0_p1(u32(I), u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArrays_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let l = a;
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn conv_arr4_arr3_mat2x2_f32(val : array<array<mat2x2_f32, 3u>, 4u>) -> array<array<mat2x2<f32>, 3u>, 4u> {
-  var arr : array<array<mat2x2<f32>, 3u>, 4u>;
-  for(var i : u32; (i < 4u); i = (i + 1)) {
-    arr[i] = conv_arr3_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let l = conv_arr4_arr3_mat2x2_f32(a);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArray_ConstOuterArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let l = a[3];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let l = conv_arr3_mat2x2_f32(a[3u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32, ArrayArrayMatUniform_LoadArray_VariableOuterArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn conv_arr3_mat2x2_f32(val : array<mat2x2_f32, 3u>) -> array<mat2x2<f32>, 3u> {
-  var arr : array<mat2x2<f32>, 3u>;
-  for(var i : u32; (i < 3u); i = (i + 1)) {
-    arr[i] = conv_mat2x2_f32(val[i]);
-  }
-  return arr;
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_arr3_mat2x2_f32(a[I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_ConstInnerArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let l = a[3][2];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let l = conv_mat2x2_f32(a[3u][2u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayArrayMatUniform_LoadMatrix_ConstOuterArrayIndex_VariableInnerArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[3][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_mat2x2_f32(a[3u][I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_ConstInnerArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][2];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_mat2x2_f32(a[I][2u]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(Std140Test_F32,
-       ArrayArrayMatUniform_LoadMatrix_VariableOuterArrayIndex_VariableInnerArrayIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn conv_mat2x2_f32(val : mat2x2_f32) -> mat2x2<f32> {
-  return mat2x2<f32>(val.col0, val.col1);
-}
-
-fn f() {
-  let I = 1;
-  let l = conv_mat2x2_f32(a[I][I]);
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let l = a[3][2][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn f() {
-  let l = a[3u][2u].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[3][2][I];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn load_a_3_2_p0(p0 : u32) -> vec2<f32> {
-  switch(p0) {
-    case 0u: {
-      return a[3u][2u].col0;
-    }
-    case 1u: {
-      return a[3u][2u].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let l = load_a_3_2_p0(u32(I));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[3][I][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn f() {
-  let I = 1;
-  let l = a[3u][I].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_ConstOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = a[3][I][J];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn load_a_3_p0_p1(p0 : u32, p1 : u32) -> vec2<f32> {
-  switch(p1) {
-    case 0u: {
-      return a[3u][p0].col0;
-    }
-    case 1u: {
-      return a[3u][p0].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = load_a_3_p0_p1(u32(I), u32(J));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][2][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn f() {
-  let I = 1;
-  let l = a[I][2u].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_ConstInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = a[I][2][J];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn load_a_p0_2_p1(p0 : u32, p1 : u32) -> vec2<f32> {
-  switch(p1) {
-    case 0u: {
-      return a[p0][2u].col0;
-    }
-    case 1u: {
-      return a[p0][2u].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = load_a_p0_2_p1(u32(I), u32(J));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_ConstColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = a[I][J][1];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn f() {
-  let I = 1;
-  let J = 2;
-  let l = a[I][J].col1;
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
-TEST_F(
-    Std140Test_F32,
-    ArrayArrayMatUniform_LoadColumn_VariableOuterArrayIndex_VariableInnerArrayIndex_VariableColumnIndex_Mat2x2F32) {
-    auto* src = R"(
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2<f32>, 3>, 4>;
-
-fn f() {
-  let I = 0;
-  let J = 1;
-  let K = 2;
-  let l = a[I][J][K];
-}
-)";
-
-    auto* expect = R"(
-struct mat2x2_f32 {
-  col0 : vec2<f32>,
-  col1 : vec2<f32>,
-}
-
-@group(0) @binding(0) var<uniform> a : array<array<mat2x2_f32, 3u>, 4u>;
-
-fn load_a_p0_p1_p2(p0 : u32, p1 : u32, p2 : u32) -> vec2<f32> {
-  switch(p2) {
-    case 0u: {
-      return a[p0][p1].col0;
-    }
-    case 1u: {
-      return a[p0][p1].col1;
-    }
-    default: {
-      return vec2<f32>();
-    }
-  }
-}
-
-fn f() {
-  let I = 0;
-  let J = 1;
-  let K = 2;
-  let l = load_a_p0_p1_p2(u32(I), u32(J), u32(K));
-}
-)";
-
-    auto got = Run<Std140>(src);
-
-    EXPECT_EQ(expect, str(got));
-}
-
 }  // namespace
 }  // namespace tint::transform

diff --git a/src/tint/writer/hlsl/generator_impl.cc b/src/tint/writer/hlsl/generator_impl.cc
index dcad1b5..1e786da 100644
--- a/src/tint/writer/hlsl/generator_impl.cc
+++ b/src/tint/writer/hlsl/generator_impl.cc

@@ -1097,31 +1097,62 @@
     const auto& args = expr->args;
     auto* offset_arg = builder_.Sem().Get(args[1]);
 
-    uint32_t scalar_offset_value = 0;
-    std::string scalar_offset_expr;
+    // offset in bytes
+    uint32_t scalar_offset_bytes = 0;
+    // offset in uint (4 bytes)
+    uint32_t scalar_offset_index = 0;
+    // expression to calculate offset in bytes
+    std::string scalar_offset_bytes_expr;
+    // expression to calculate offset in uint, by dividing scalar_offset_bytes_expr by 4
+    std::string scalar_offset_index_expr;
+    // expression to calculate offset in uint, independently
+    std::string scalar_offset_index_unified_expr;
 
-    // If true, use scalar_offset_value, otherwise use scalar_offset_expr
+    // If true, use scalar_offset_index, otherwise use scalar_offset_index_expr
     bool scalar_offset_constant = false;
 
     if (auto* val = offset_arg->ConstantValue()) {
         TINT_ASSERT(Writer, val->Type()->Is<sem::U32>());
-        scalar_offset_value = static_cast<uint32_t>(std::get<AInt>(val->Value()));
-        scalar_offset_value /= 4;  // bytes -> scalar index
+        scalar_offset_bytes = static_cast<uint32_t>(std::get<AInt>(val->Value()));
+        scalar_offset_index = scalar_offset_bytes / 4;  // bytes -> scalar index
         scalar_offset_constant = true;
     }
 
+    // If true, scalar_offset_bytes or scalar_offset_bytes_expr should be used, otherwise only use
+    // scalar_offset_index or scalar_offset_index_unified_expr. Currently only loading f16 scalar
+    // require using offset in bytes.
+    const bool need_offset_in_bytes =
+        intrinsic->type == transform::DecomposeMemoryAccess::Intrinsic::DataType::kF16;
+
     if (!scalar_offset_constant) {
         // UBO offset not compile-time known.
         // Calculate the scalar offset into a temporary.
-        scalar_offset_expr = UniqueIdentifier("scalar_offset");
-        auto pre = line();
-        pre << "const uint " << scalar_offset_expr << " = (";
-        if (!EmitExpression(pre, args[1])) {  // offset
-            return false;
+        if (need_offset_in_bytes) {
+            scalar_offset_bytes_expr = UniqueIdentifier("scalar_offset_bytes");
+            scalar_offset_index_expr = UniqueIdentifier("scalar_offset_index");
+            {
+                auto pre = line();
+                pre << "const uint " << scalar_offset_bytes_expr << " = (";
+                if (!EmitExpression(pre, args[1])) {  // offset
+                    return false;
+                }
+                pre << ");";
+            }
+            line() << "const uint " << scalar_offset_index_expr << " = " << scalar_offset_bytes_expr
+                   << " / 4;";
+        } else {
+            scalar_offset_index_unified_expr = UniqueIdentifier("scalar_offset");
+            auto pre = line();
+            pre << "const uint " << scalar_offset_index_unified_expr << " = (";
+            if (!EmitExpression(pre, args[1])) {  // offset
+                return false;
+            }
+            pre << ") / 4;";
         }
-        pre << ") / 4;";
     }
 
+    constexpr const char swizzle[] = {'x', 'y', 'z', 'w'};
+
     using Op = transform::DecomposeMemoryAccess::Intrinsic::Op;
     using DataType = transform::DecomposeMemoryAccess::Intrinsic::DataType;
     switch (intrinsic->op) {
@@ -1132,27 +1163,28 @@
                 out << ")";
                 return result;
             };
-            auto load_scalar = [&]() {
-                if (!EmitExpression(out, args[0])) {  // buffer
+            auto load_u32_to = [&](std::ostream& target) {
+                if (!EmitExpression(target, args[0])) {  // buffer
                     return false;
                 }
                 if (scalar_offset_constant) {
-                    char swizzle[] = {'x', 'y', 'z', 'w'};
-                    out << "[" << (scalar_offset_value / 4) << "]."
-                        << swizzle[scalar_offset_value & 3];
+                    target << "[" << (scalar_offset_index / 4) << "]."
+                           << swizzle[scalar_offset_index & 3];
                 } else {
-                    out << "[" << scalar_offset_expr << " / 4][" << scalar_offset_expr << " % 4]";
+                    target << "[" << scalar_offset_index_unified_expr << " / 4]["
+                           << scalar_offset_index_unified_expr << " % 4]";
                 }
                 return true;
             };
+            auto load_u32 = [&] { return load_u32_to(out); };
             // Has a minimum alignment of 8 bytes, so is either .xy or .zw
-            auto load_vec2 = [&] {
+            auto load_vec2_u32_to = [&](std::ostream& target) {
                 if (scalar_offset_constant) {
-                    if (!EmitExpression(out, args[0])) {  // buffer
+                    if (!EmitExpression(target, args[0])) {  // buffer
                         return false;
                     }
-                    out << "[" << (scalar_offset_value / 4) << "]";
-                    out << ((scalar_offset_value & 2) == 0 ? ".xy" : ".zw");
+                    target << "[" << (scalar_offset_index / 4) << "]";
+                    target << ((scalar_offset_index & 2) == 0 ? ".xy" : ".zw");
                 } else {
                     std::string ubo_load = UniqueIdentifier("ubo_load");
                     {
@@ -1161,58 +1193,190 @@
                         if (!EmitExpression(pre, args[0])) {  // buffer
                             return false;
                         }
-                        pre << "[" << scalar_offset_expr << " / 4];";
+                        pre << "[" << scalar_offset_index_unified_expr << " / 4];";
                     }
-                    out << "((" << scalar_offset_expr << " & 2) ? " << ubo_load
-                        << ".zw : " << ubo_load << ".xy)";
+                    target << "((" << scalar_offset_index_unified_expr << " & 2) ? " << ubo_load
+                           << ".zw : " << ubo_load << ".xy)";
                 }
                 return true;
             };
+            auto load_vec2_u32 = [&] { return load_vec2_u32_to(out); };
             // vec4 has a minimum alignment of 16 bytes, easiest case
-            auto load_vec4 = [&] {
+            auto load_vec4_u32 = [&] {
                 if (!EmitExpression(out, args[0])) {  // buffer
                     return false;
                 }
                 if (scalar_offset_constant) {
-                    out << "[" << (scalar_offset_value / 4) << "]";
+                    out << "[" << (scalar_offset_index / 4) << "]";
                 } else {
-                    out << "[" << scalar_offset_expr << " / 4]";
+                    out << "[" << scalar_offset_index_unified_expr << " / 4]";
                 }
                 return true;
             };
             // vec3 has a minimum alignment of 16 bytes, so is just a .xyz swizzle
-            auto load_vec3 = [&] {
-                if (!load_vec4()) {
+            auto load_vec3_u32 = [&] {
+                if (!load_vec4_u32()) {
                     return false;
                 }
                 out << ".xyz";
                 return true;
             };
+            auto load_scalar_f16 = [&] {
+                // offset bytes = 4k,   ((buffer[index].x) & 0xFFFF)
+                // offset bytes = 4k+2, ((buffer[index].x >> 16) & 0xFFFF)
+                out << "float16_t(f16tof32(((";
+                if (!EmitExpression(out, args[0])) {  // buffer
+                    return false;
+                }
+                if (scalar_offset_constant) {
+                    out << "[" << (scalar_offset_index / 4) << "]."
+                        << swizzle[scalar_offset_index & 3];
+                    // WGSL spec ensure little endian memory layout.
+                    if (scalar_offset_bytes % 4 == 0) {
+                        out << ") & 0xFFFF)";
+                    } else {
+                        out << " >> 16) & 0xFFFF)";
+                    }
+                } else {
+                    out << "[" << scalar_offset_index_expr << " / 4][" << scalar_offset_index_expr
+                        << " % 4] >> (" << scalar_offset_bytes_expr
+                        << " % 4 == 0 ? 0 : 16)) & 0xFFFF)";
+                }
+                out << "))";
+                return true;
+            };
+            auto load_vec2_f16 = [&] {
+                // vec2<f16> is aligned to 4 bytes
+                // Preclude code load the vec2<f16> data as a uint:
+                //     uint ubo_load = buffer[id0][id1];
+                // Loading code convert it to vec2<f16>:
+                //     vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)),
+                //     float16_t(f16tof32(ubo_load >> 16)))
+                std::string ubo_load = UniqueIdentifier("ubo_load");
+                {
+                    auto pre = line();
+                    // Load the 4 bytes f16 vector as an uint
+                    pre << "uint " << ubo_load << " = ";
+                    if (!load_u32_to(pre)) {
+                        return false;
+                    }
+                    pre << ";";
+                }
+                out << "vector<float16_t, 2>(float16_t(f16tof32(" << ubo_load
+                    << " & 0xFFFF)), float16_t(f16tof32(" << ubo_load << " >> 16)))";
+                return true;
+            };
+            auto load_vec3_f16 = [&] {
+                // vec3<f16> is aligned to 8 bytes
+                // Preclude code load the vec3<f16> data as uint2 and convert its elements to
+                // float16_t:
+                //     uint2 ubo_load = buffer[id0].xy;
+                //     /* The low 8 bits of two uint are the x and z elements of vec3<f16> */
+                //     vector<float16_t> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load &
+                //     0xFFFF));
+                //     /* The high 8 bits of first uint is the y element of vec3<f16> */
+                //     float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+                // Loading code convert it to vec3<f16>:
+                //     vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1])
+                std::string ubo_load = UniqueIdentifier("ubo_load");
+                std::string ubo_load_xz = UniqueIdentifier(ubo_load + "_xz");
+                std::string ubo_load_y = UniqueIdentifier(ubo_load + "_y");
+                {
+                    auto pre = line();
+                    // Load the 8 bytes uint2 with the f16 vector at lower 6 bytes
+                    pre << "uint2 " << ubo_load << " = ";
+                    if (!load_vec2_u32_to(pre)) {
+                        return false;
+                    }
+                    pre << ";";
+                }
+                {
+                    auto pre = line();
+                    pre << "vector<float16_t, 2> " << ubo_load_xz
+                        << " = vector<float16_t, 2>(f16tof32(" << ubo_load << " & 0xFFFF));";
+                }
+                {
+                    auto pre = line();
+                    pre << "float16_t " << ubo_load_y << " = f16tof32(" << ubo_load
+                        << "[0] >> 16);";
+                }
+                out << "vector<float16_t, 3>(" << ubo_load_xz << "[0], " << ubo_load_y << ", "
+                    << ubo_load_xz << "[1])";
+                return true;
+            };
+            auto load_vec4_f16 = [&] {
+                // vec4<f16> is aligned to 8 bytes
+                // Preclude code load the vec4<f16> data as uint2 and convert its elements to
+                // float16_t:
+                //     uint2 ubo_load = buffer[id0].xy;
+                //     /* The low 8 bits of two uint are the x and z elements of vec4<f16> */
+                //     vector<float16_t> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load &
+                //     0xFFFF));
+                //     /* The high 8 bits of two uint are the y and w elements of vec4<f16> */
+                //     vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >>
+                //     16));
+                // Loading code convert it to vec4<f16>:
+                //     vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1],
+                //     ubo_load_yw[1])
+                std::string ubo_load = UniqueIdentifier("ubo_load");
+                std::string ubo_load_xz = UniqueIdentifier(ubo_load + "_xz");
+                std::string ubo_load_yw = UniqueIdentifier(ubo_load + "_yw");
+                {
+                    auto pre = line();
+                    // Load the 8 bytes f16 vector as an uint2
+                    pre << "uint2 " << ubo_load << " = ";
+                    if (!load_vec2_u32_to(pre)) {
+                        return false;
+                    }
+                    pre << ";";
+                }
+                {
+                    auto pre = line();
+                    pre << "vector<float16_t, 2> " << ubo_load_xz
+                        << " = vector<float16_t, 2>(f16tof32(" << ubo_load << " & 0xFFFF));";
+                }
+                {
+                    auto pre = line();
+                    pre << "vector<float16_t, 2> " << ubo_load_yw
+                        << " = vector<float16_t, 2>(f16tof32(" << ubo_load << " >> 16));";
+                }
+                out << "vector<float16_t, 4>(" << ubo_load_xz << "[0], " << ubo_load_yw << "[0], "
+                    << ubo_load_xz << "[1], " << ubo_load_yw << "[1])";
+                return true;
+            };
             switch (intrinsic->type) {
                 case DataType::kU32:
-                    return load_scalar();
+                    return load_u32();
                 case DataType::kF32:
-                    return cast("asfloat", load_scalar);
+                    return cast("asfloat", load_u32);
                 case DataType::kI32:
-                    return cast("asint", load_scalar);
+                    return cast("asint", load_u32);
+                case DataType::kF16:
+                    return load_scalar_f16();
                 case DataType::kVec2U32:
-                    return load_vec2();
+                    return load_vec2_u32();
                 case DataType::kVec2F32:
-                    return cast("asfloat", load_vec2);
+                    return cast("asfloat", load_vec2_u32);
                 case DataType::kVec2I32:
-                    return cast("asint", load_vec2);
+                    return cast("asint", load_vec2_u32);
+                case DataType::kVec2F16:
+                    return load_vec2_f16();
                 case DataType::kVec3U32:
-                    return load_vec3();
+                    return load_vec3_u32();
                 case DataType::kVec3F32:
-                    return cast("asfloat", load_vec3);
+                    return cast("asfloat", load_vec3_u32);
                 case DataType::kVec3I32:
-                    return cast("asint", load_vec3);
+                    return cast("asint", load_vec3_u32);
+                case DataType::kVec3F16:
+                    return load_vec3_f16();
                 case DataType::kVec4U32:
-                    return load_vec4();
+                    return load_vec4_u32();
                 case DataType::kVec4F32:
-                    return cast("asfloat", load_vec4);
+                    return cast("asfloat", load_vec4_u32);
                 case DataType::kVec4I32:
-                    return cast("asint", load_vec4);
+                    return cast("asint", load_vec4_u32);
+                case DataType::kVec4F16:
+                    return load_vec4_f16();
             }
             TINT_UNREACHABLE(Writer, diagnostics_)
                 << "unsupported DecomposeMemoryAccess::Intrinsic::DataType: "
@@ -1257,6 +1421,20 @@
                 }
                 return true;
             };
+            // Templated load used for f16 types, requires SM6.2 or higher and DXC
+            // Used by loading f16 types, e.g. for f16 type, set type parameter to "float16_t"
+            // to emit `buffer.Load<float16_t>(offset)`.
+            auto templated_load = [&](const char* type) {
+                if (!EmitExpression(out, args[0])) {  // buffer
+                    return false;
+                }
+                out << ".Load<" << type << ">";  // templated load
+                ScopedParen sp(out);
+                if (!EmitExpression(out, args[1])) {  // offset
+                    return false;
+                }
+                return true;
+            };
             switch (intrinsic->type) {
                 case DataType::kU32:
                     return load(nullptr, 1);
@@ -1264,24 +1442,32 @@
                     return load("asfloat", 1);
                 case DataType::kI32:
                     return load("asint", 1);
+                case DataType::kF16:
+                    return templated_load("float16_t");
                 case DataType::kVec2U32:
                     return load(nullptr, 2);
                 case DataType::kVec2F32:
                     return load("asfloat", 2);
                 case DataType::kVec2I32:
                     return load("asint", 2);
+                case DataType::kVec2F16:
+                    return templated_load("vector<float16_t, 2> ");
                 case DataType::kVec3U32:
                     return load(nullptr, 3);
                 case DataType::kVec3F32:
                     return load("asfloat", 3);
                 case DataType::kVec3I32:
                     return load("asint", 3);
+                case DataType::kVec3F16:
+                    return templated_load("vector<float16_t, 3> ");
                 case DataType::kVec4U32:
                     return load(nullptr, 4);
                 case DataType::kVec4F32:
                     return load("asfloat", 4);
                 case DataType::kVec4I32:
                     return load("asint", 4);
+                case DataType::kVec4F16:
+                    return templated_load("vector<float16_t, 4> ");
             }
             TINT_UNREACHABLE(Writer, diagnostics_)
                 << "unsupported DecomposeMemoryAccess::Intrinsic::DataType: "
@@ -1309,6 +1495,24 @@
                 }
                 return true;
             };
+            // Templated stored used for f16 types, requires SM6.2 or higher and DXC
+            // Used by storing f16 types, e.g. for f16 type, set type parameter to "float16_t"
+            // to emit `buffer.Store<float16_t>(offset)`.
+            auto templated_store = [&](const char* type) {
+                if (!EmitExpression(out, args[0])) {  // buffer
+                    return false;
+                }
+                out << ".Store<" << type << ">";  // templated store
+                ScopedParen sp1(out);
+                if (!EmitExpression(out, args[1])) {  // offset
+                    return false;
+                }
+                out << ", ";
+                if (!EmitExpression(out, args[2])) {  // value
+                    return false;
+                }
+                return true;
+            };
             switch (intrinsic->type) {
                 case DataType::kU32:
                     return store(1);
@@ -1316,24 +1520,32 @@
                     return store(1);
                 case DataType::kI32:
                     return store(1);
+                case DataType::kF16:
+                    return templated_store("float16_t");
                 case DataType::kVec2U32:
                     return store(2);
                 case DataType::kVec2F32:
                     return store(2);
                 case DataType::kVec2I32:
                     return store(2);
+                case DataType::kVec2F16:
+                    return templated_store("vector<float16_t, 2> ");
                 case DataType::kVec3U32:
                     return store(3);
                 case DataType::kVec3F32:
                     return store(3);
                 case DataType::kVec3I32:
                     return store(3);
+                case DataType::kVec3F16:
+                    return templated_store("vector<float16_t, 3> ");
                 case DataType::kVec4U32:
                     return store(4);
                 case DataType::kVec4F32:
                     return store(4);
                 case DataType::kVec4I32:
                     return store(4);
+                case DataType::kVec4F16:
+                    return templated_store("vector<float16_t, 4> ");
             }
             TINT_UNREACHABLE(Writer, diagnostics_)
                 << "unsupported DecomposeMemoryAccess::Intrinsic::DataType: "

diff --git a/src/tint/writer/hlsl/generator_impl_member_accessor_test.cc b/src/tint/writer/hlsl/generator_impl_member_accessor_test.cc
index 1fbef9b..b3440b8 100644
--- a/src/tint/writer/hlsl/generator_impl_member_accessor_test.cc
+++ b/src/tint/writer/hlsl/generator_impl_member_accessor_test.cc

@@ -34,6 +34,9 @@
 inline const ast::Type* ty_f32(const ProgramBuilder::TypesBuilder& ty) {
     return ty.f32();
 }
+inline const ast::Type* ty_f16(const ProgramBuilder::TypesBuilder& ty) {
+    return ty.f16();
+}
 template <typename T>
 inline const ast::Type* ty_vec2(const ProgramBuilder::TypesBuilder& ty) {
     return ty.vec2<T>();
@@ -94,6 +97,14 @@
                     b.Group(1_a), b.Binding(0_a));
     }
 
+    void SetupUniformBuffer(utils::VectorRef<const ast::StructMember*> members) {
+        ProgramBuilder& b = *this;
+        auto* s = b.Structure("Data", members);
+
+        b.GlobalVar("data", b.ty.Of(s), ast::AddressSpace::kUniform, ast::Access::kUndefined,
+                    b.Group(1_a), b.Binding(1_a));
+    }
+
     void SetupFunction(utils::VectorRef<const ast::Statement*> statements) {
         ProgramBuilder& b = *this;
         utils::Vector attrs{
@@ -144,18 +155,21 @@
     return out;
 }
 
-using HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad =
+using HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_ConstantOffset =
     HlslGeneratorImplTest_MemberAccessorWithParam<TypeCase>;
-TEST_P(HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad, Test) {
+
+TEST_P(HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_ConstantOffset, Test) {
     // struct Data {
-    //   a : i32;
-    //   b : <type>;
+    //   a : i32,
+    //   b : <type>,
     // };
     // var<storage> data : Data;
     // data.b;
 
     auto p = GetParam();
 
+    Enable(ast::Extension::kF16);
+
     SetupStorageBuffer(utils::Vector{
         Member("a", ty.i32()),
         Member("b", p.member_type(ty)),
@@ -173,60 +187,813 @@
 
 INSTANTIATE_TEST_SUITE_P(
     HlslGeneratorImplTest_MemberAccessor,
-    HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad,
+    HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_ConstantOffset,
+    testing::Values(TypeCase{ty_u32, "data.Load(4u)"},
+                    TypeCase{ty_f32, "asfloat(data.Load(4u))"},
+                    TypeCase{ty_i32, "asint(data.Load(4u))"},
+                    TypeCase{ty_f16, "data.Load<float16_t>(4u)"},
+                    TypeCase{ty_vec2<u32>, "data.Load2(8u)"},
+                    TypeCase{ty_vec2<f32>, "asfloat(data.Load2(8u))"},
+                    TypeCase{ty_vec2<i32>, "asint(data.Load2(8u))"},
+                    TypeCase{ty_vec2<f16>, "data.Load<vector<float16_t, 2> >(4u)"},
+                    TypeCase{ty_vec3<u32>, "data.Load3(16u)"},
+                    TypeCase{ty_vec3<f32>, "asfloat(data.Load3(16u))"},
+                    TypeCase{ty_vec3<i32>, "asint(data.Load3(16u))"},
+                    TypeCase{ty_vec3<f16>, "data.Load<vector<float16_t, 3> >(8u)"},
+                    TypeCase{ty_vec4<u32>, "data.Load4(16u)"},
+                    TypeCase{ty_vec4<f32>, "asfloat(data.Load4(16u))"},
+                    TypeCase{ty_vec4<i32>, "asint(data.Load4(16u))"},
+                    TypeCase{ty_vec4<f16>, "data.Load<vector<float16_t, 4> >(8u)"},
+                    TypeCase{ty_mat2x2<f32>,
+                             "return float2x2(asfloat(buffer.Load2((offset + 0u))), "
+                             "asfloat(buffer.Load2((offset + 8u))));"},
+                    TypeCase{ty_mat2x3<f32>,
+                             "return float2x3(asfloat(buffer.Load3((offset + 0u))), "
+                             "asfloat(buffer.Load3((offset + 16u))));"},
+                    TypeCase{ty_mat2x4<f32>,
+                             "return float2x4(asfloat(buffer.Load4((offset + 0u))), "
+                             "asfloat(buffer.Load4((offset + 16u))));"},
+                    TypeCase{ty_mat3x2<f32>,
+                             "return float3x2(asfloat(buffer.Load2((offset + 0u))), "
+                             "asfloat(buffer.Load2((offset + 8u))), "
+                             "asfloat(buffer.Load2((offset + 16u))));"},
+                    TypeCase{ty_mat3x3<f32>,
+                             "return float3x3(asfloat(buffer.Load3((offset + 0u))), "
+                             "asfloat(buffer.Load3((offset + 16u))), "
+                             "asfloat(buffer.Load3((offset + 32u))));"},
+                    TypeCase{ty_mat3x4<f32>,
+                             "return float3x4(asfloat(buffer.Load4((offset + 0u))), "
+                             "asfloat(buffer.Load4((offset + 16u))), "
+                             "asfloat(buffer.Load4((offset + 32u))));"},
+                    TypeCase{ty_mat4x2<f32>,
+                             "return float4x2(asfloat(buffer.Load2((offset + 0u))), "
+                             "asfloat(buffer.Load2((offset + 8u))), "
+                             "asfloat(buffer.Load2((offset + 16u))), "
+                             "asfloat(buffer.Load2((offset + 24u))));"},
+                    TypeCase{ty_mat4x3<f32>,
+                             "return float4x3(asfloat(buffer.Load3((offset + 0u))), "
+                             "asfloat(buffer.Load3((offset + 16u))), "
+                             "asfloat(buffer.Load3((offset + 32u))), "
+                             "asfloat(buffer.Load3((offset + 48u))));"},
+                    TypeCase{ty_mat4x4<f32>,
+                             "return float4x4(asfloat(buffer.Load4((offset + 0u))), "
+                             "asfloat(buffer.Load4((offset + 16u))), "
+                             "asfloat(buffer.Load4((offset + 32u))), "
+                             "asfloat(buffer.Load4((offset + 48u))));"},
+                    TypeCase{ty_mat2x2<f16>,
+                             "return matrix<float16_t, 2, 2>("
+                             "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 4u)));"},
+                    TypeCase{ty_mat2x3<f16>,
+                             "return matrix<float16_t, 2, 3>("
+                             "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 8u)));"},
+                    TypeCase{ty_mat2x4<f16>,
+                             "return matrix<float16_t, 2, 4>("
+                             "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 8u)));"},
+                    TypeCase{ty_mat3x2<f16>,
+                             "return matrix<float16_t, 3, 2>("
+                             "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 4u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 8u)));"},
+                    TypeCase{ty_mat3x3<f16>,
+                             "return matrix<float16_t, 3, 3>("
+                             "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 8u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 16u)));"},
+                    TypeCase{ty_mat3x4<f16>,
+                             "return matrix<float16_t, 3, 4>("
+                             "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 8u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 16u)));"},
+                    TypeCase{ty_mat4x2<f16>,
+                             "return matrix<float16_t, 4, 2>("
+                             "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 4u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 8u)), "
+                             "buffer.Load<vector<float16_t, 2> >((offset + 12u)));"},
+                    TypeCase{ty_mat4x3<f16>,
+                             "return matrix<float16_t, 4, 3>("
+                             "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 8u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 16u)), "
+                             "buffer.Load<vector<float16_t, 3> >((offset + 24u)));"},
+                    TypeCase{ty_mat4x4<f16>,
+                             "return matrix<float16_t, 4, 4>("
+                             "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 8u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 16u)), "
+                             "buffer.Load<vector<float16_t, 4> >((offset + 24u)));"}));
+
+using HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_DynamicOffset =
+    HlslGeneratorImplTest_MemberAccessorWithParam<TypeCase>;
+
+TEST_P(HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_DynamicOffset, Test) {
+    // struct Inner {
+    //   a : i32,
+    //   b : <type>,
+    //   c : vec4<i32>,
+    // };
+    // struct Data {
+    //  arr : array<Inner, 4i>,
+    // }
+    // var<storage> data : Data;
+    // data.arr[i].b;
+
+    auto p = GetParam();
+
+    Enable(ast::Extension::kF16);
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.i32()),
+                                         Member("b", p.member_type(ty)),
+                                         Member("c", ty.vec4(ty.i32())),
+                                     });
+
+    SetupStorageBuffer(utils::Vector{
+        Member("arr", ty.array(ty.Of(inner), 4_i)),
+    });
+
+    auto* i = Var("i", Expr(2_i));
+
+    SetupFunction(utils::Vector{
+        Decl(i),
+        Decl(Var("x", MemberAccessor(IndexAccessor(MemberAccessor("data", "arr"), i), "b"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    EXPECT_THAT(gen.result(), HasSubstr(p.expected));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    HlslGeneratorImplTest_MemberAccessor,
+    HlslGeneratorImplTest_MemberAccessor_StorageBufferLoad_DynamicOffset,
     testing::Values(
-        TypeCase{ty_u32, "data.Load(4u)"},
-        TypeCase{ty_f32, "asfloat(data.Load(4u))"},
-        TypeCase{ty_i32, "asint(data.Load(4u))"},
-        TypeCase{ty_vec2<u32>, "data.Load2(8u)"},
-        TypeCase{ty_vec2<f32>, "asfloat(data.Load2(8u))"},
-        TypeCase{ty_vec2<i32>, "asint(data.Load2(8u))"},
-        TypeCase{ty_vec3<u32>, "data.Load3(16u)"},
-        TypeCase{ty_vec3<f32>, "asfloat(data.Load3(16u))"},
-        TypeCase{ty_vec3<i32>, "asint(data.Load3(16u))"},
-        TypeCase{ty_vec4<u32>, "data.Load4(16u)"},
-        TypeCase{ty_vec4<f32>, "asfloat(data.Load4(16u))"},
-        TypeCase{ty_vec4<i32>, "asint(data.Load4(16u))"},
-        TypeCase{
-            ty_mat2x2<f32>,
-            R"(return float2x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))));)"},
-        TypeCase{
-            ty_mat2x3<f32>,
-            R"(return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));)"},
-        TypeCase{
-            ty_mat2x4<f32>,
-            R"(return float2x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))));)"},
-        TypeCase{
-            ty_mat3x2<f32>,
-            R"(return float3x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))));)"},
-        TypeCase{
-            ty_mat3x3<f32>,
-            R"(return float3x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))));)"},
-        TypeCase{
-            ty_mat3x4<f32>,
-            R"(return float3x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))));)"},
-        TypeCase{
-            ty_mat4x2<f32>,
-            R"(return float4x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))), asfloat(buffer.Load2((offset + 24u))));)"},
-        TypeCase{
-            ty_mat4x3<f32>,
-            R"(return float4x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))), asfloat(buffer.Load3((offset + 32u))), asfloat(buffer.Load3((offset + 48u))));)"},
-        TypeCase{
-            ty_mat4x4<f32>,
-            R"(return float4x4(asfloat(buffer.Load4((offset + 0u))), asfloat(buffer.Load4((offset + 16u))), asfloat(buffer.Load4((offset + 32u))), asfloat(buffer.Load4((offset + 48u))));)"}));
+        TypeCase{ty_u32, "data.Load(((32u * uint(i)) + 4u))"},
+        TypeCase{ty_f32, "asfloat(data.Load(((32u * uint(i)) + 4u)))"},
+        TypeCase{ty_i32, "asint(data.Load(((32u * uint(i)) + 4u)))"},
+        TypeCase{ty_f16, "data.Load<float16_t>(((32u * uint(i)) + 4u))"},
+        TypeCase{ty_vec2<u32>, "data.Load2(((32u * uint(i)) + 8u))"},
+        TypeCase{ty_vec2<f32>, "asfloat(data.Load2(((32u * uint(i)) + 8u)))"},
+        TypeCase{ty_vec2<i32>, "asint(data.Load2(((32u * uint(i)) + 8u)))"},
+        TypeCase{ty_vec2<f16>, "data.Load<vector<float16_t, 2> >(((32u * uint(i)) + 4u))"},
+        TypeCase{ty_vec3<u32>, "data.Load3(((48u * uint(i)) + 16u))"},
+        TypeCase{ty_vec3<f32>, "asfloat(data.Load3(((48u * uint(i)) + 16u)))"},
+        TypeCase{ty_vec3<i32>, "asint(data.Load3(((48u * uint(i)) + 16u)))"},
+        TypeCase{ty_vec3<f16>, "data.Load<vector<float16_t, 3> >(((32u * uint(i)) + 8u))"},
+        TypeCase{ty_vec4<u32>, "data.Load4(((48u * uint(i)) + 16u))"},
+        TypeCase{ty_vec4<f32>, "asfloat(data.Load4(((48u * uint(i)) + 16u)))"},
+        TypeCase{ty_vec4<i32>, "asint(data.Load4(((48u * uint(i)) + 16u)))"},
+        TypeCase{ty_vec4<f16>, "data.Load<vector<float16_t, 4> >(((32u * uint(i)) + 8u))"},
+        TypeCase{ty_mat2x2<f32>,
+                 "return float2x2(asfloat(buffer.Load2((offset + 0u))), "
+                 "asfloat(buffer.Load2((offset + 8u))));"},
+        TypeCase{ty_mat2x3<f32>,
+                 "return float2x3(asfloat(buffer.Load3((offset + 0u))), "
+                 "asfloat(buffer.Load3((offset + 16u))));"},
+        TypeCase{ty_mat2x4<f32>,
+                 "return float2x4(asfloat(buffer.Load4((offset + 0u))), "
+                 "asfloat(buffer.Load4((offset + 16u))));"},
+        TypeCase{ty_mat3x2<f32>,
+                 "return float3x2(asfloat(buffer.Load2((offset + 0u))), "
+                 "asfloat(buffer.Load2((offset + 8u))), "
+                 "asfloat(buffer.Load2((offset + 16u))));"},
+        TypeCase{ty_mat3x3<f32>,
+                 "return float3x3(asfloat(buffer.Load3((offset + 0u))), "
+                 "asfloat(buffer.Load3((offset + 16u))), "
+                 "asfloat(buffer.Load3((offset + 32u))));"},
+        TypeCase{ty_mat3x4<f32>,
+                 "return float3x4(asfloat(buffer.Load4((offset + 0u))), "
+                 "asfloat(buffer.Load4((offset + 16u))), "
+                 "asfloat(buffer.Load4((offset + 32u))));"},
+        TypeCase{ty_mat4x2<f32>,
+                 "return float4x2(asfloat(buffer.Load2((offset + 0u))), "
+                 "asfloat(buffer.Load2((offset + 8u))), "
+                 "asfloat(buffer.Load2((offset + 16u))), "
+                 "asfloat(buffer.Load2((offset + 24u))));"},
+        TypeCase{ty_mat4x3<f32>,
+                 "return float4x3(asfloat(buffer.Load3((offset + 0u))), "
+                 "asfloat(buffer.Load3((offset + 16u))), "
+                 "asfloat(buffer.Load3((offset + 32u))), "
+                 "asfloat(buffer.Load3((offset + 48u))));"},
+        TypeCase{ty_mat4x4<f32>,
+                 "return float4x4(asfloat(buffer.Load4((offset + 0u))), "
+                 "asfloat(buffer.Load4((offset + 16u))), "
+                 "asfloat(buffer.Load4((offset + 32u))), "
+                 "asfloat(buffer.Load4((offset + 48u))));"},
+        TypeCase{ty_mat2x2<f16>,
+                 "return matrix<float16_t, 2, 2>("
+                 "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 4u)));"},
+        TypeCase{ty_mat2x3<f16>,
+                 "return matrix<float16_t, 2, 3>("
+                 "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 8u)));"},
+        TypeCase{ty_mat2x4<f16>,
+                 "return matrix<float16_t, 2, 4>("
+                 "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 8u)));"},
+        TypeCase{ty_mat3x2<f16>,
+                 "return matrix<float16_t, 3, 2>("
+                 "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 4u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 8u)));"},
+        TypeCase{ty_mat3x3<f16>,
+                 "return matrix<float16_t, 3, 3>("
+                 "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 8u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 16u)));"},
+        TypeCase{ty_mat3x4<f16>,
+                 "return matrix<float16_t, 3, 4>("
+                 "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 8u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 16u)));"},
+        TypeCase{ty_mat4x2<f16>,
+                 "return matrix<float16_t, 4, 2>("
+                 "buffer.Load<vector<float16_t, 2> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 4u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 8u)), "
+                 "buffer.Load<vector<float16_t, 2> >((offset + 12u)));"},
+        TypeCase{ty_mat4x3<f16>,
+                 "return matrix<float16_t, 4, 3>("
+                 "buffer.Load<vector<float16_t, 3> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 8u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 16u)), "
+                 "buffer.Load<vector<float16_t, 3> >((offset + 24u)));"},
+        TypeCase{ty_mat4x4<f16>,
+                 "return matrix<float16_t, 4, 4>("
+                 "buffer.Load<vector<float16_t, 4> >((offset + 0u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 8u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 16u)), "
+                 "buffer.Load<vector<float16_t, 4> >((offset + 24u)));"}));
+
+using HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_ConstantOffset =
+    HlslGeneratorImplTest_MemberAccessorWithParam<TypeCase>;
+TEST_P(HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_ConstantOffset, Test) {
+    // struct Data {
+    //   a : i32,
+    //   b : <type>,
+    // };
+    // var<uniform> data : Data;
+    // data.b;
+
+    auto p = GetParam();
+
+    Enable(ast::Extension::kF16);
+
+    SetupUniformBuffer(utils::Vector{
+        Member("a", ty.i32()),
+        Member("b", p.member_type(ty)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", MemberAccessor("data", "b"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    EXPECT_THAT(gen.result(), HasSubstr(p.expected));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    HlslGeneratorImplTest_MemberAccessor,
+    HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_ConstantOffset,
+    testing::Values(TypeCase{ty_u32, "uint x = data[0].y;"},
+                    TypeCase{ty_f32, "float x = asfloat(data[0].y);"},
+                    TypeCase{ty_i32, "int x = asint(data[0].y);"},
+                    TypeCase{ty_f16, "float16_t x = float16_t(f16tof32(((data[0].y) & 0xFFFF)));"},
+                    TypeCase{ty_vec2<u32>, "uint2 x = data[0].zw;"},
+                    TypeCase{ty_vec2<f32>, "float2 x = asfloat(data[0].zw);"},
+                    TypeCase{ty_vec2<i32>, "int2 x = asint(data[0].zw);"},
+                    TypeCase{ty_vec2<f16>, R"(uint ubo_load = data[0].y;
+  vector<float16_t, 2> x = vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16)));)"},
+                    TypeCase{ty_vec3<u32>, "uint3 x = data[1].xyz;"},
+                    TypeCase{ty_vec3<f32>, "float3 x = asfloat(data[1].xyz);"},
+                    TypeCase{ty_vec3<i32>, "int3 x = asint(data[1].xyz);"},
+                    TypeCase{ty_vec3<f16>, R"(uint2 ubo_load = data[0].zw;
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  vector<float16_t, 3> x = vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]);)"},
+                    TypeCase{ty_vec4<u32>, "uint4 x = data[1];"},
+                    TypeCase{ty_vec4<f32>, "float4 x = asfloat(data[1]);"},
+                    TypeCase{ty_vec4<i32>, "int4 x = asint(data[1]);"},
+                    TypeCase{ty_vec4<f16>,
+                             R"(uint2 ubo_load = data[0].zw;
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  vector<float16_t, 4> x = vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]);)"},
+                    TypeCase{ty_mat2x2<f32>, R"(float2x2 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+})"},
+                    TypeCase{ty_mat2x3<f32>, R"(float2x3 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+})"},
+                    TypeCase{ty_mat2x4<f32>, R"(float2x4 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+})"},
+                    TypeCase{ty_mat3x2<f32>, R"(float3x2 tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+})"},
+                    TypeCase{ty_mat3x3<f32>, R"(float3x3 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+})"},
+                    TypeCase{ty_mat3x4<f32>, R"(float3x4 tint_symbol(uint4 buffer[4], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+})"},
+                    TypeCase{ty_mat4x2<f32>, R"(float4x2 tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+})"},
+                    TypeCase{ty_mat4x3<f32>, R"(float4x3 tint_symbol(uint4 buffer[5], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+})"},
+                    TypeCase{ty_mat4x4<f32>, R"(float4x4 tint_symbol(uint4 buffer[5], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+})"},
+                    TypeCase{ty_mat2x2<f16>,
+                             R"(matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+})"},
+                    TypeCase{ty_mat2x3<f16>,
+                             R"(matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+})"},
+                    TypeCase{ty_mat2x4<f16>,
+                             R"(matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+})"},
+                    TypeCase{ty_mat3x2<f16>,
+                             R"(matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[1], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+})"},
+                    TypeCase{ty_mat3x3<f16>,
+                             R"(matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+})"},
+                    TypeCase{ty_mat3x4<f16>,
+                             R"(matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));)"},
+                    TypeCase{ty_mat4x2<f16>,
+                             R"(matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+})"},
+                    TypeCase{ty_mat4x3<f16>,
+                             R"(matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+})"},
+                    TypeCase{ty_mat4x4<f16>,
+                             R"(matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+})"}));
+
+using HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_DynamicOffset =
+    HlslGeneratorImplTest_MemberAccessorWithParam<TypeCase>;
+
+TEST_P(HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_DynamicOffset, Test) {
+    // struct Inner {
+    //   a : i32,
+    //   b : <type>,
+    //   c : vec4<i32>,
+    // };
+    // struct Data {
+    //  arr : array<Inner, 4i>,
+    // }
+    // var<uniform> data : Data;
+    // data.arr[i].b;
+
+    auto p = GetParam();
+
+    Enable(ast::Extension::kF16);
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.i32()),
+                                         Member("b", p.member_type(ty)),
+                                         Member("c", ty.vec4(ty.i32())),
+                                     });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("arr", ty.array(ty.Of(inner), 4_i)),
+    });
+
+    auto* i = Var("i", Expr(2_i));
+
+    SetupFunction(utils::Vector{
+        Decl(i),
+        Decl(Var("x", MemberAccessor(IndexAccessor(MemberAccessor("data", "arr"), i), "b"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    EXPECT_THAT(gen.result(), HasSubstr(p.expected));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    HlslGeneratorImplTest_MemberAccessor,
+    HlslGeneratorImplTest_MemberAccessor_UniformBufferLoad_DynamicOffset,
+    testing::Values(
+        TypeCase{ty_u32, "x = data[scalar_offset / 4][scalar_offset % 4]"},
+        TypeCase{ty_f32, "x = asfloat(data[scalar_offset / 4][scalar_offset % 4])"},
+        TypeCase{ty_i32, "x = asint(data[scalar_offset / 4][scalar_offset % 4])"},
+        TypeCase{ty_f16, R"(const uint scalar_offset_bytes = (((32u * uint(i)) + 4u));
+  const uint scalar_offset_index = scalar_offset_bytes / 4;
+  float16_t x = float16_t(f16tof32(((data[scalar_offset_index / 4][scalar_offset_index % 4] >> (scalar_offset_bytes % 4 == 0 ? 0 : 16)) & 0xFFFF)));)"},
+        TypeCase{ty_vec2<u32>, R"(uint4 ubo_load = data[scalar_offset / 4];
+  uint2 x = ((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy);)"},
+        TypeCase{ty_vec2<f32>, R"(uint4 ubo_load = data[scalar_offset / 4];
+  float2 x = asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy));)"},
+        TypeCase{ty_vec2<i32>, R"(uint4 ubo_load = data[scalar_offset / 4];
+  int2 x = asint(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy));)"},
+        TypeCase{ty_vec2<f16>, R"(const uint scalar_offset = (((32u * uint(i)) + 4u)) / 4;
+  uint ubo_load = data[scalar_offset / 4][scalar_offset % 4];
+  vector<float16_t, 2> x = vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16)));)"},
+        TypeCase{ty_vec3<u32>, "x = data[scalar_offset / 4].xyz"},
+        TypeCase{ty_vec3<f32>, "x = asfloat(data[scalar_offset / 4].xyz)"},
+        TypeCase{ty_vec3<i32>, "x = asint(data[scalar_offset / 4].xyz)"},
+        TypeCase{ty_vec3<f16>, R"(const uint scalar_offset = (((32u * uint(i)) + 8u)) / 4;
+  uint4 ubo_load_1 = data[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  vector<float16_t, 3> x = vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]);)"},
+        TypeCase{ty_vec4<u32>, "x = data[scalar_offset / 4]"},
+        TypeCase{ty_vec4<f32>, "x = asfloat(data[scalar_offset / 4])"},
+        TypeCase{ty_vec4<i32>, "x = asint(data[scalar_offset / 4])"},
+        TypeCase{ty_vec4<f16>, R"(const uint scalar_offset = (((32u * uint(i)) + 8u)) / 4;
+  uint4 ubo_load_1 = data[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  vector<float16_t, 4> x = vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]);)"},
+        TypeCase{ty_mat2x2<f32>, R"(float2x2 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  return float2x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)));
+})"},
+        TypeCase{ty_mat2x3<f32>, R"(float2x3 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz));
+})"},
+        TypeCase{ty_mat2x4<f32>, R"(float2x4 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  return float2x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]));
+})"},
+        TypeCase{ty_mat3x2<f32>, R"(float3x2 tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  return float3x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)));
+})"},
+        TypeCase{ty_mat3x3<f32>, R"(float3x3 tint_symbol(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz));
+})"},
+        TypeCase{ty_mat3x4<f32>, R"(float3x4 tint_symbol(uint4 buffer[20], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  return float3x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]));
+})"},
+        TypeCase{ty_mat4x2<f32>, R"(float4x2 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load = buffer[scalar_offset / 4];
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset_1 / 4];
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_2 = buffer[scalar_offset_2 / 4];
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_3 / 4];
+  return float4x2(asfloat(((scalar_offset & 2) ? ubo_load.zw : ubo_load.xy)), asfloat(((scalar_offset_1 & 2) ? ubo_load_1.zw : ubo_load_1.xy)), asfloat(((scalar_offset_2 & 2) ? ubo_load_2.zw : ubo_load_2.xy)), asfloat(((scalar_offset_3 & 2) ? ubo_load_3.zw : ubo_load_3.xy)));
+})"},
+        TypeCase{ty_mat4x3<f32>, R"(float4x3 tint_symbol(uint4 buffer[24], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+})"},
+        TypeCase{ty_mat4x4<f32>, R"(float4x4 tint_symbol(uint4 buffer[24], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x4(asfloat(buffer[scalar_offset / 4]), asfloat(buffer[scalar_offset_1 / 4]), asfloat(buffer[scalar_offset_2 / 4]), asfloat(buffer[scalar_offset_3 / 4]));
+})"},
+        TypeCase{ty_mat2x2<f16>,
+                 R"(matrix<float16_t, 2, 2> tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  return matrix<float16_t, 2, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))));
+})"},
+        TypeCase{ty_mat2x3<f16>,
+                 R"(matrix<float16_t, 2, 3> tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  return matrix<float16_t, 2, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]));
+})"},
+        TypeCase{ty_mat2x4<f16>,
+                 R"(matrix<float16_t, 2, 4> tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  return matrix<float16_t, 2, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]));
+})"},
+        TypeCase{ty_mat3x2<f16>,
+                 R"(matrix<float16_t, 3, 2> tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+})"},
+        TypeCase{ty_mat3x3<f16>,
+                 R"(matrix<float16_t, 3, 3> tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+})"},
+        TypeCase{ty_mat3x4<f16>,
+                 R"(matrix<float16_t, 3, 4> tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  return matrix<float16_t, 3, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]));
+})"},
+        TypeCase{ty_mat4x2<f16>,
+                 R"(matrix<float16_t, 4, 2> tint_symbol(uint4 buffer[12], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  const uint scalar_offset_3 = ((offset + 12u)) / 4;
+  uint ubo_load_3 = buffer[scalar_offset_3 / 4][scalar_offset_3 % 4];
+  return matrix<float16_t, 4, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_3 & 0xFFFF)), float16_t(f16tof32(ubo_load_3 >> 16))));
+})"},
+        TypeCase{ty_mat4x3<f16>,
+                 R"(matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+})"},
+        TypeCase{ty_mat4x4<f16>,
+                 R"(matrix<float16_t, 4, 4> tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  vector<float16_t, 2> ubo_load_yw = vector<float16_t, 2>(f16tof32(ubo_load >> 16));
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_2_yw = vector<float16_t, 2>(f16tof32(ubo_load_2 >> 16));
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_4_yw = vector<float16_t, 2>(f16tof32(ubo_load_4 >> 16));
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  vector<float16_t, 2> ubo_load_6_yw = vector<float16_t, 2>(f16tof32(ubo_load_6 >> 16));
+  return matrix<float16_t, 4, 4>(vector<float16_t, 4>(ubo_load_xz[0], ubo_load_yw[0], ubo_load_xz[1], ubo_load_yw[1]), vector<float16_t, 4>(ubo_load_2_xz[0], ubo_load_2_yw[0], ubo_load_2_xz[1], ubo_load_2_yw[1]), vector<float16_t, 4>(ubo_load_4_xz[0], ubo_load_4_yw[0], ubo_load_4_xz[1], ubo_load_4_yw[1]), vector<float16_t, 4>(ubo_load_6_xz[0], ubo_load_6_yw[0], ubo_load_6_xz[1], ubo_load_6_yw[1]));
+})"}));
 
 using HlslGeneratorImplTest_MemberAccessor_StorageBufferStore =
     HlslGeneratorImplTest_MemberAccessorWithParam<TypeCase>;
 TEST_P(HlslGeneratorImplTest_MemberAccessor_StorageBufferStore, Test) {
     // struct Data {
-    //   a : i32;
-    //   b : <type>;
+    //   a : i32,
+    //   b : <type>,
     // };
     // var<storage> data : Data;
     // data.b = <type>();
 
     auto p = GetParam();
 
+    Enable(ast::Extension::kF16);
+
     SetupStorageBuffer(utils::Vector{
         Member("a", ty.i32()),
         Member("b", p.member_type(ty)),
@@ -243,73 +1010,123 @@
     EXPECT_THAT(gen.result(), HasSubstr(p.expected));
 }
 
-INSTANTIATE_TEST_SUITE_P(HlslGeneratorImplTest_MemberAccessor,
-                         HlslGeneratorImplTest_MemberAccessor_StorageBufferStore,
-                         testing::Values(TypeCase{ty_u32, "data.Store(4u, asuint(value))"},
-                                         TypeCase{ty_f32, "data.Store(4u, asuint(value))"},
-                                         TypeCase{ty_i32, "data.Store(4u, asuint(value))"},
-                                         TypeCase{ty_vec2<u32>, "data.Store2(8u, asuint(value))"},
-                                         TypeCase{ty_vec2<f32>, "data.Store2(8u, asuint(value))"},
-                                         TypeCase{ty_vec2<i32>, "data.Store2(8u, asuint(value))"},
-                                         TypeCase{ty_vec3<u32>, "data.Store3(16u, asuint(value))"},
-                                         TypeCase{ty_vec3<f32>, "data.Store3(16u, asuint(value))"},
-                                         TypeCase{ty_vec3<i32>, "data.Store3(16u, asuint(value))"},
-                                         TypeCase{ty_vec4<u32>, "data.Store4(16u, asuint(value))"},
-                                         TypeCase{ty_vec4<f32>, "data.Store4(16u, asuint(value))"},
-                                         TypeCase{ty_vec4<i32>, "data.Store4(16u, asuint(value))"},
-                                         TypeCase{ty_mat2x2<f32>, R"({
+INSTANTIATE_TEST_SUITE_P(
+    HlslGeneratorImplTest_MemberAccessor,
+    HlslGeneratorImplTest_MemberAccessor_StorageBufferStore,
+    testing::Values(TypeCase{ty_u32, "data.Store(4u, asuint(value))"},
+                    TypeCase{ty_f32, "data.Store(4u, asuint(value))"},
+                    TypeCase{ty_i32, "data.Store(4u, asuint(value))"},
+                    TypeCase{ty_f16, "data.Store<float16_t>(4u, value)"},
+                    TypeCase{ty_vec2<u32>, "data.Store2(8u, asuint(value))"},
+                    TypeCase{ty_vec2<f32>, "data.Store2(8u, asuint(value))"},
+                    TypeCase{ty_vec2<i32>, "data.Store2(8u, asuint(value))"},
+                    TypeCase{ty_vec2<f16>, "data.Store<vector<float16_t, 2> >(4u, value)"},
+                    TypeCase{ty_vec3<u32>, "data.Store3(16u, asuint(value))"},
+                    TypeCase{ty_vec3<f32>, "data.Store3(16u, asuint(value))"},
+                    TypeCase{ty_vec3<i32>, "data.Store3(16u, asuint(value))"},
+                    TypeCase{ty_vec3<f16>, "data.Store<vector<float16_t, 3> >(8u, value)"},
+                    TypeCase{ty_vec4<u32>, "data.Store4(16u, asuint(value))"},
+                    TypeCase{ty_vec4<f32>, "data.Store4(16u, asuint(value))"},
+                    TypeCase{ty_vec4<i32>, "data.Store4(16u, asuint(value))"},
+                    TypeCase{ty_vec4<f16>, "data.Store<vector<float16_t, 4> >(8u, value)"},
+                    TypeCase{ty_mat2x2<f32>, R"({
   buffer.Store2((offset + 0u), asuint(value[0u]));
   buffer.Store2((offset + 8u), asuint(value[1u]));
 })"},
-                                         TypeCase{ty_mat2x3<f32>, R"({
+                    TypeCase{ty_mat2x3<f32>, R"({
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
 })"},
-                                         TypeCase{ty_mat2x4<f32>, R"({
+                    TypeCase{ty_mat2x4<f32>, R"({
   buffer.Store4((offset + 0u), asuint(value[0u]));
   buffer.Store4((offset + 16u), asuint(value[1u]));
 })"},
-                                         TypeCase{ty_mat3x2<f32>, R"({
+                    TypeCase{ty_mat3x2<f32>, R"({
   buffer.Store2((offset + 0u), asuint(value[0u]));
   buffer.Store2((offset + 8u), asuint(value[1u]));
   buffer.Store2((offset + 16u), asuint(value[2u]));
 })"},
-                                         TypeCase{ty_mat3x3<f32>, R"({
+                    TypeCase{ty_mat3x3<f32>, R"({
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
   buffer.Store3((offset + 32u), asuint(value[2u]));
 })"},
-                                         TypeCase{ty_mat3x4<f32>, R"({
+                    TypeCase{ty_mat3x4<f32>, R"({
   buffer.Store4((offset + 0u), asuint(value[0u]));
   buffer.Store4((offset + 16u), asuint(value[1u]));
   buffer.Store4((offset + 32u), asuint(value[2u]));
 })"},
-                                         TypeCase{ty_mat4x2<f32>, R"({
+                    TypeCase{ty_mat4x2<f32>, R"({
   buffer.Store2((offset + 0u), asuint(value[0u]));
   buffer.Store2((offset + 8u), asuint(value[1u]));
   buffer.Store2((offset + 16u), asuint(value[2u]));
   buffer.Store2((offset + 24u), asuint(value[3u]));
 })"},
-                                         TypeCase{ty_mat4x3<f32>, R"({
+                    TypeCase{ty_mat4x3<f32>, R"({
   buffer.Store3((offset + 0u), asuint(value[0u]));
   buffer.Store3((offset + 16u), asuint(value[1u]));
   buffer.Store3((offset + 32u), asuint(value[2u]));
   buffer.Store3((offset + 48u), asuint(value[3u]));
 })"},
-                                         TypeCase{ty_mat4x4<f32>, R"({
+                    TypeCase{ty_mat4x4<f32>, R"({
   buffer.Store4((offset + 0u), asuint(value[0u]));
   buffer.Store4((offset + 16u), asuint(value[1u]));
   buffer.Store4((offset + 32u), asuint(value[2u]));
   buffer.Store4((offset + 48u), asuint(value[3u]));
+})"},
+                    TypeCase{ty_mat2x2<f16>, R"({
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+})"},
+                    TypeCase{ty_mat2x3<f16>, R"({
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+})"},
+                    TypeCase{ty_mat2x4<f16>, R"({
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+})"},
+                    TypeCase{ty_mat3x2<f16>, R"({
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+})"},
+                    TypeCase{ty_mat3x3<f16>, R"({
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+})"},
+                    TypeCase{ty_mat3x4<f16>, R"({
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+})"},
+                    TypeCase{ty_mat4x2<f16>, R"({
+  buffer.Store<vector<float16_t, 2> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 4u), value[1u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 8u), value[2u]);
+  buffer.Store<vector<float16_t, 2> >((offset + 12u), value[3u]);
+})"},
+                    TypeCase{ty_mat4x3<f16>, R"({
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+})"},
+                    TypeCase{ty_mat4x4<f16>, R"({
+  buffer.Store<vector<float16_t, 4> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 4> >((offset + 24u), value[3u]);
 })"}));
 
 TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Store_Matrix_Empty) {
     // struct Data {
-    //   z : f32;
-    //   a : mat2x3<f32>;
+    //   a : f32,
+    //   b : mat2x3<f32>,
     // };
     // var<storage> data : Data;
-    // data.a = mat2x3<f32>();
+    // data.b = mat2x3<f32>();
 
     SetupStorageBuffer(utils::Vector{
         Member("a", ty.i32()),
@@ -339,10 +1156,10 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
-TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_Matrix_Single_Element) {
+TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_Matrix_F32_Single_Element) {
     // struct Data {
-    //   z : f32;
-    //   a : mat4x3<f32>;
+    //   z : f32,
+    //   a : mat4x3<f32>,
     // };
     // var<storage> data : Data;
     // data.a[2i][1i];
@@ -370,17 +1187,119 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
-TEST_F(HlslGeneratorImplTest_MemberAccessor,
-       EmitExpression_IndexAccessor_StorageBuffer_Load_Int_FromArray) {
+TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_Matrix_F16_Single_Element) {
     // struct Data {
-    //   a : array<i32, 5>;
+    //   z : f16,
+    //   a : mat4x3<f16>,
+    // };
+    // var<storage> data : Data;
+    // data.a[2i][1i];
+
+    Enable(ast::Extension::kF16);
+
+    SetupStorageBuffer(utils::Vector{
+        Member("z", ty.f16()),
+        Member("a", ty.mat4x3<f16>()),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(IndexAccessor(MemberAccessor("data", "a"), 2_i), 1_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(RWByteAddressBuffer data : register(u0, space1);
+
+void main() {
+  float16_t x = data.Load<float16_t>(26u);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, UniformBuffer_Load_Matrix_F32_Single_Element) {
+    // struct Data {
+    //   z : f32,
+    //   a : mat4x3<f32>,
+    // };
+    // var<uniform> data : Data;
+    // data.a[2i][1i];
+
+    SetupUniformBuffer(utils::Vector{
+        Member("z", ty.f32()),
+        Member("a", ty.mat4x3<f32>()),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(IndexAccessor(MemberAccessor("data", "a"), 2_i), 1_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[5];
+};
+
+void main() {
+  float x = asfloat(data[3].y);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, UniformBuffer_Load_Matrix_F16_Single_Element) {
+    // struct Data {
+    //   z : f16,
+    //   a : mat4x3<f16>,
+    // };
+    // var<uniform> data : Data;
+    // data.a[2i][1i];
+
+    Enable(ast::Extension::kF16);
+
+    SetupUniformBuffer(utils::Vector{
+        Member("z", ty.f16()),
+        Member("a", ty.mat4x3<f16>()),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(IndexAccessor(MemberAccessor("data", "a"), 2_i), 1_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[3];
+};
+
+void main() {
+  float16_t x = float16_t(f16tof32(((data[1].z >> 16) & 0xFFFF)));
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       EmitExpression_IndexAccessor_StorageBuffer_Load_I32_FromArray) {
+    // struct Data {
+    //   z : f32,
+    //   a : array<i32, 5i>,
     // };
     // var<storage> data : Data;
     // data.a[2];
 
     SetupStorageBuffer(utils::Vector{
         Member("z", ty.f32()),
-        Member("a", ty.array<i32, 5>(4)),
+        Member("a", ty.array(ty.i32(), 5_i)),
     });
 
     SetupFunction(utils::Vector{
@@ -402,16 +1321,154 @@
 }
 
 TEST_F(HlslGeneratorImplTest_MemberAccessor,
-       EmitExpression_IndexAccessor_StorageBuffer_Load_Int_FromArray_ExprIdx) {
+       EmitExpression_IndexAccessor_UniformBuffer_Load_Vec4_I32_FromArray) {
     // struct Data {
-    //   a : array<i32, 5>;
+    //   z : f32,
+    //   a : array<vec4<i32>, 5i>,
+    // };
+    // var<uniform> data : Data;
+    // data.a[2];
+
+    SetupUniformBuffer(utils::Vector{
+        Member("z", ty.f32()),
+        Member("a", ty.array(ty.vec4(ty.i32()), 5_i)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(MemberAccessor("data", "a"), 2_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[6];
+};
+
+void main() {
+  int4 x = asint(data[3]);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       EmitExpression_IndexAccessor_StorageBuffer_Load_Struct_FromArray) {
+    // struct Inner {
+    //   @size(16i) @align(16i)
+    //   v : i32,
+    // };
+    // struct Data {
+    //   z : f32,
+    //   a : array<Inner, 5i>,
+    // };
+    // var<storage> data : Data;
+    // data.a[2i];
+
+    auto* elem_type = Structure(
+        "Inner", utils::Vector{
+                     Member("v", ty.i32(), utils::Vector{MemberSize(16_i), MemberAlign(16_i)}),
+                 });
+
+    SetupStorageBuffer(utils::Vector{
+        Member("z", ty.f32()),
+        Member("a", ty.array(ty.Of(elem_type), 5_i)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(MemberAccessor("data", "a"), 2_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(struct Inner {
+  int v;
+};
+
+RWByteAddressBuffer data : register(u0, space1);
+
+Inner tint_symbol(RWByteAddressBuffer buffer, uint offset) {
+  const Inner tint_symbol_2 = {asint(buffer.Load((offset + 0u)))};
+  return tint_symbol_2;
+}
+
+void main() {
+  Inner x = tint_symbol(data, 48u);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       EmitExpression_IndexAccessor_UniformBuffer_Load_Struct_FromArray) {
+    // struct Inner {
+    //   @size(16i) @align(16i)
+    //   v : i32,
+    // };
+    // struct Data {
+    //   z : f32,
+    //   a : array<Inner, 5i>,
+    // };
+    // var<uniform> data : Data;
+    // data.a[2i];
+
+    auto* elem_type = Structure(
+        "Inner", utils::Vector{
+                     Member("v", ty.i32(), utils::Vector{MemberSize(16_i), MemberAlign(16_i)}),
+                 });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("z", ty.f32()),
+        Member("a", ty.array(ty.Of(elem_type), 5_i)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", IndexAccessor(MemberAccessor("data", "a"), 2_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(struct Inner {
+  int v;
+};
+
+cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[6];
+};
+
+Inner tint_symbol(uint4 buffer[6], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const Inner tint_symbol_2 = {asint(buffer[scalar_offset / 4][scalar_offset % 4])};
+  return tint_symbol_2;
+}
+
+void main() {
+  Inner x = tint_symbol(data, 48u);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       EmitExpression_IndexAccessor_StorageBuffer_Load_I32_FromArray_ExprIdx) {
+    // struct Data {
+    //   z : f32,
+    //   a : array<i32, 5i>,
     // };
     // var<storage> data : Data;
     // data.a[(2i + 4i) - 3i];
 
     SetupStorageBuffer(utils::Vector{
         Member("z", ty.f32()),
-        Member("a", ty.array<i32, 5>(4)),
+        Member("a", ty.array(ty.i32(), 5_i)),
     });
 
     SetupFunction(utils::Vector{
@@ -438,16 +1495,57 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       EmitExpression_IndexAccessor_UniformBuffer_Load_Vec4_I32_FromArray_ExprIdx) {
+    // struct Data {
+    //   z : f32,
+    //   a : array<vec4<i32>, 5i>,
+    // };
+    // var<uniform> data : Data;
+    // data.a[(2i + 4i) - 3i];
+
+    SetupUniformBuffer(utils::Vector{
+        Member("z", ty.f32()),
+        Member("a", ty.array(ty.vec4(ty.i32()), 5_i)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("a", Expr(2_i))),
+        Decl(Var("b", Expr(4_i))),
+        Decl(Var("c", Expr(3_i))),
+        Decl(Var("x", IndexAccessor(MemberAccessor("data", "a"), Sub(Add("a", "b"), "c")))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[6];
+};
+
+void main() {
+  int a = 2;
+  int b = 4;
+  int c = 3;
+  const uint scalar_offset = ((16u + (16u * uint(((a + b) - c))))) / 4;
+  int4 x = asint(data[scalar_offset / 4]);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
 TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Store_ToArray) {
     // struct Data {
-    //   a : array<i32, 5>;
+    //   a : array<i32, 5i>,
     // };
     // var<storage> data : Data;
-    // data.a[2] = 2;
+    // data.a[2i] = 2i;
 
     SetupStorageBuffer(utils::Vector{
         Member("z", ty.f32()),
-        Member("a", ty.array<i32, 5>(4)),
+        Member("a", ty.array(ty.i32(), 5_i)),
     });
 
     SetupFunction(utils::Vector{
@@ -470,23 +1568,23 @@
 
 TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_MultiLevel) {
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   c : array<Inner, 4u>,
     // };
     //
-    // var<storage> data : Pre;
-    // data.c[2].b
+    // var<storage> data : Data;
+    // data.c[2i].b
 
     auto* inner = Structure("Inner", utils::Vector{
-                                         Member("a", ty.vec3<f32>()),
+                                         Member("a", ty.vec3<i32>()),
                                          Member("b", ty.vec3<f32>()),
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{
@@ -507,31 +1605,72 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
-TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_MultiLevel_Swizzle) {
+TEST_F(HlslGeneratorImplTest_MemberAccessor, UniformBuffer_Load_MultiLevel) {
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   var c : array<Inner, 4u>,
     // };
     //
-    // var<storage> data : Pre;
-    // data.c[2].b.xy
+    // var<storage> data : Data;
+    // data.c[2i].b
 
     auto* inner = Structure("Inner", utils::Vector{
-                                         Member("a", ty.vec3<f32>()),
+                                         Member("a", ty.vec3<i32>()),
+                                         Member("b", ty.vec3<f32>()),
+                                     });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("c", ty.array(ty.Of(inner), 4_u)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x", MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[8];
+};
+
+void main() {
+  float3 x = asfloat(data[5].xyz);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_MultiLevel_Swizzle) {
+    // struct Inner {
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
+    // };
+    // struct Data {
+    //   var c : array<Inner, 4u>,
+    // };
+    //
+    // var<storage> data : Data;
+    // data.c[2i].b.yx
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.vec3<i32>()),
                                          Member("b", ty.vec3<f32>()),
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{
         Decl(Var("x",
                  MemberAccessor(
-                     MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"), "xy"))),
+                     MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"), "yx"))),
     });
 
     GeneratorImpl& gen = SanitizeAndBuild();
@@ -541,7 +1680,50 @@
         R"(RWByteAddressBuffer data : register(u0, space1);
 
 void main() {
-  float2 x = asfloat(data.Load3(80u)).xy;
+  float2 x = asfloat(data.Load3(80u)).yx;
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, UniformBuffer_Load_MultiLevel_Swizzle) {
+    // struct Inner {
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
+    // };
+    // struct Data {
+    //   var c : array<Inner, 4u>,
+    // };
+    //
+    // var<uniform> data : Data;
+    // data.c[2i].b.yx
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.vec3<i32>()),
+                                         Member("b", ty.vec3<f32>()),
+                                     });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("c", ty.array(ty.Of(inner), 4_u)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x",
+                 MemberAccessor(
+                     MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"), "yx"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[8];
+};
+
+void main() {
+  float2 x = asfloat(data[5].xyz).yx;
   return;
 }
 )";
@@ -551,23 +1733,23 @@
 TEST_F(HlslGeneratorImplTest_MemberAccessor,
        StorageBuffer_Load_MultiLevel_Swizzle_SingleLetter) {  // NOLINT
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   var c : array<Inner, 4u>,
     // };
     //
-    // var<storage> data : Pre;
-    // data.c[2].b.g
+    // var<storage> data : Data;
+    // data.c[2i].b.g
 
     auto* inner = Structure("Inner", utils::Vector{
-                                         Member("a", ty.vec3<f32>()),
+                                         Member("a", ty.vec3<i32>()),
                                          Member("b", ty.vec3<f32>()),
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{
@@ -590,25 +1772,69 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
-TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_MultiLevel_Index) {
+TEST_F(HlslGeneratorImplTest_MemberAccessor,
+       UniformBuffer_Load_MultiLevel_Swizzle_SingleLetter) {  // NOLINT
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   var c : array<Inner, 4u>,
     // };
     //
-    // var<storage> data : Pre;
-    // data.c[2].b[1]
+    // var<uniform> data : Data;
+    // data.c[2i].b.g
 
     auto* inner = Structure("Inner", utils::Vector{
-                                         Member("a", ty.vec3<f32>()),
+                                         Member("a", ty.vec3<i32>()),
+                                         Member("b", ty.vec3<f32>()),
+                                     });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("c", ty.array(ty.Of(inner), 4_u)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x",
+                 MemberAccessor(
+                     MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"), "g"))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[8];
+};
+
+void main() {
+  float x = asfloat(data[5].y);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Load_MultiLevel_Index) {
+    // struct Inner {
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
+    // };
+    // struct Data {
+    //   var c : array<Inner, 4u>,
+    // };
+    //
+    // var<storage> data : Data;
+    // data.c[2i].b[1i]
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.vec3<i32>()),
                                          Member("b", ty.vec3<f32>()),
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{
@@ -631,25 +1857,68 @@
     EXPECT_EQ(gen.result(), expected);
 }
 
-TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Store_MultiLevel) {
+TEST_F(HlslGeneratorImplTest_MemberAccessor, UniformBuffer_Load_MultiLevel_Index) {
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   var c : array<Inner, 4u>,
+    // };
+    //
+    // var<uniform> data : Data;
+    // data.c[2i].b[1i]
+
+    auto* inner = Structure("Inner", utils::Vector{
+                                         Member("a", ty.vec3<i32>()),
+                                         Member("b", ty.vec3<f32>()),
+                                     });
+
+    SetupUniformBuffer(utils::Vector{
+        Member("c", ty.array(ty.Of(inner), 4_u)),
+    });
+
+    SetupFunction(utils::Vector{
+        Decl(Var("x",
+                 IndexAccessor(MemberAccessor(IndexAccessor(MemberAccessor("data", "c"), 2_i), "b"),
+                               1_i))),
+    });
+
+    GeneratorImpl& gen = SanitizeAndBuild();
+
+    ASSERT_TRUE(gen.Generate()) << gen.error();
+    auto* expected =
+        R"(cbuffer cbuffer_data : register(b1, space1) {
+  uint4 data[8];
+};
+
+void main() {
+  float x = asfloat(data[5].y);
+  return;
+}
+)";
+    EXPECT_EQ(gen.result(), expected);
+}
+
+TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Store_MultiLevel) {
+    // struct Inner {
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
+    // };
+    // struct Data {
+    //   var c : array<Inner, 4u>,
     // };
     //
     // var<storage> data : Pre;
-    // data.c[2].b = vec3<f32>(1_f, 2_f, 3_f);
+    // data.c[2i].b = vec3<f32>(1_f, 2_f, 3_f);
 
     auto* inner = Structure("Inner", utils::Vector{
-                                         Member("a", ty.vec3<f32>()),
+                                         Member("a", ty.vec3<i32>()),
                                          Member("b", ty.vec3<f32>()),
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{
@@ -673,15 +1942,15 @@
 
 TEST_F(HlslGeneratorImplTest_MemberAccessor, StorageBuffer_Store_Swizzle_SingleLetter) {
     // struct Inner {
-    //   a : vec3<i32>;
-    //   b : vec3<f32>;
+    //   a : vec3<i32>,
+    //   b : vec3<f32>,
     // };
     // struct Data {
-    //   var c : array<Inner, 4u>;
+    //   var c : array<Inner, 4u>,
     // };
     //
     // var<storage> data : Pre;
-    // data.c[2].b.y = 1.f;
+    // data.c[2i].b.y = 1.f;
 
     auto* inner = Structure("Inner", utils::Vector{
                                          Member("a", ty.vec3<i32>()),
@@ -689,7 +1958,7 @@
                                      });
 
     SetupStorageBuffer(utils::Vector{
-        Member("c", ty.array(ty.Of(inner), 4_u, 32)),
+        Member("c", ty.array(ty.Of(inner), 4_u)),
     });
 
     SetupFunction(utils::Vector{

diff --git a/src/tint/writer/spirv/builder.cc b/src/tint/writer/spirv/builder.cc
index 9b1e077..c5e4a38 100644
--- a/src/tint/writer/spirv/builder.cc
+++ b/src/tint/writer/spirv/builder.cc

@@ -3955,11 +3955,7 @@
     if (matrix_type) {
         push_annot(spv::Op::OpMemberDecorate,
                    {Operand(struct_id), Operand(idx), U32Operand(SpvDecorationColMajor)});
-        if (!matrix_type->type()->Is<sem::F32>()) {
-            error_ = "matrix scalar element type must be f32";
-            return 0;
-        }
-        const uint32_t scalar_elem_size = 4;
+        const uint32_t scalar_elem_size = matrix_type->type()->Size();
         const uint32_t effective_row_count = (matrix_type->rows() == 2) ? 2 : 4;
         push_annot(spv::Op::OpMemberDecorate,
                    {Operand(struct_id), Operand(idx), U32Operand(SpvDecorationMatrixStride),

diff --git a/src/tint/writer/spirv/builder_type_test.cc b/src/tint/writer/spirv/builder_type_test.cc
index a17dcb5..4377c42 100644
--- a/src/tint/writer/spirv/builder_type_test.cc
+++ b/src/tint/writer/spirv/builder_type_test.cc

@@ -317,7 +317,9 @@
 }
 
 TEST_F(BuilderTest_Type, GenerateStruct) {
-    auto* s = Structure("my_struct", utils::Vector{Member("a", ty.f32())});
+    Enable(ast::Extension::kF16);
+
+    auto* s = Structure("my_struct", utils::Vector{Member("a", ty.f32()), Member("b", ty.f16())});
 
     spirv::Builder& b = Build();
 
@@ -326,17 +328,23 @@
     EXPECT_EQ(id, 1u);
 
     EXPECT_EQ(DumpInstructions(b.types()), R"(%2 = OpTypeFloat 32
-%1 = OpTypeStruct %2
+%3 = OpTypeFloat 16
+%1 = OpTypeStruct %2 %3
 )");
     EXPECT_EQ(DumpInstructions(b.debug()), R"(OpName %1 "my_struct"
 OpMemberName %1 0 "a"
+OpMemberName %1 1 "b"
 )");
 }
 
 TEST_F(BuilderTest_Type, GenerateStruct_DecoratedMembers) {
+    Enable(ast::Extension::kF16);
+
     auto* s = Structure("S", utils::Vector{
                                  Member("a", ty.f32()),
                                  Member("b", ty.f32(), utils::Vector{MemberAlign(8_i)}),
+                                 Member("c", ty.f16(), utils::Vector{MemberAlign(8_u)}),
+                                 Member("d", ty.f16()),
                              });
 
     spirv::Builder& b = Build();
@@ -346,23 +354,34 @@
     EXPECT_EQ(id, 1u);
 
     EXPECT_EQ(DumpInstructions(b.types()), R"(%2 = OpTypeFloat 32
-%1 = OpTypeStruct %2 %2
+%3 = OpTypeFloat 16
+%1 = OpTypeStruct %2 %2 %3 %3
 )");
     EXPECT_EQ(DumpInstructions(b.debug()), R"(OpName %1 "S"
 OpMemberName %1 0 "a"
 OpMemberName %1 1 "b"
+OpMemberName %1 2 "c"
+OpMemberName %1 3 "d"
 )");
     EXPECT_EQ(DumpInstructions(b.annots()), R"(OpMemberDecorate %1 0 Offset 0
 OpMemberDecorate %1 1 Offset 8
+OpMemberDecorate %1 2 Offset 16
+OpMemberDecorate %1 3 Offset 18
 )");
 }
 
-TEST_F(BuilderTest_Type, GenerateStruct_NonLayout_Matrix) {
-    auto* s = Structure("S", utils::Vector{
-                                 Member("a", ty.mat2x2<f32>()),
-                                 Member("b", ty.mat2x3<f32>()),
-                                 Member("c", ty.mat4x4<f32>()),
-                             });
+TEST_F(BuilderTest_Type, GenerateStruct_DecoratedMembers_Matrix) {
+    Enable(ast::Extension::kF16);
+
+    auto* s =
+        Structure("S", utils::Vector{
+                           Member("mat2x2_f32", ty.mat2x2<f32>()),
+                           Member("mat2x3_f32", ty.mat2x3<f32>(), utils::Vector{MemberAlign(64_i)}),
+                           Member("mat4x4_f32", ty.mat4x4<f32>()),
+                           Member("mat2x2_f16", ty.mat2x2<f16>(), utils::Vector{MemberAlign(32_i)}),
+                           Member("mat2x3_f16", ty.mat2x3<f16>()),
+                           Member("mat4x4_f16", ty.mat4x4<f16>(), utils::Vector{MemberAlign(64_i)}),
+                       });
 
     spirv::Builder& b = Build();
 
@@ -377,78 +396,63 @@
 %5 = OpTypeMatrix %6 2
 %8 = OpTypeVector %4 4
 %7 = OpTypeMatrix %8 4
-%1 = OpTypeStruct %2 %5 %7
+%11 = OpTypeFloat 16
+%10 = OpTypeVector %11 2
+%9 = OpTypeMatrix %10 2
+%13 = OpTypeVector %11 3
+%12 = OpTypeMatrix %13 2
+%15 = OpTypeVector %11 4
+%14 = OpTypeMatrix %15 4
+%1 = OpTypeStruct %2 %5 %7 %9 %12 %14
 )");
     EXPECT_EQ(DumpInstructions(b.debug()), R"(OpName %1 "S"
-OpMemberName %1 0 "a"
-OpMemberName %1 1 "b"
-OpMemberName %1 2 "c"
+OpMemberName %1 0 "mat2x2_f32"
+OpMemberName %1 1 "mat2x3_f32"
+OpMemberName %1 2 "mat4x4_f32"
+OpMemberName %1 3 "mat2x2_f16"
+OpMemberName %1 4 "mat2x3_f16"
+OpMemberName %1 5 "mat4x4_f16"
 )");
     EXPECT_EQ(DumpInstructions(b.annots()), R"(OpMemberDecorate %1 0 Offset 0
 OpMemberDecorate %1 0 ColMajor
 OpMemberDecorate %1 0 MatrixStride 8
-OpMemberDecorate %1 1 Offset 16
+OpMemberDecorate %1 1 Offset 64
 OpMemberDecorate %1 1 ColMajor
 OpMemberDecorate %1 1 MatrixStride 16
-OpMemberDecorate %1 2 Offset 48
+OpMemberDecorate %1 2 Offset 96
 OpMemberDecorate %1 2 ColMajor
 OpMemberDecorate %1 2 MatrixStride 16
+OpMemberDecorate %1 3 Offset 160
+OpMemberDecorate %1 3 ColMajor
+OpMemberDecorate %1 3 MatrixStride 4
+OpMemberDecorate %1 4 Offset 168
+OpMemberDecorate %1 4 ColMajor
+OpMemberDecorate %1 4 MatrixStride 8
+OpMemberDecorate %1 5 Offset 192
+OpMemberDecorate %1 5 ColMajor
+OpMemberDecorate %1 5 MatrixStride 8
 )");
 }
 
-TEST_F(BuilderTest_Type, GenerateStruct_DecoratedMembers_LayoutMatrix) {
-    // We have to infer layout for matrix when it also has an offset.
-    auto* s = Structure("S", utils::Vector{
-                                 Member("a", ty.mat2x2<f32>()),
-                                 Member("b", ty.mat2x3<f32>()),
-                                 Member("c", ty.mat4x4<f32>()),
-                             });
+TEST_F(BuilderTest_Type, GenerateStruct_DecoratedMembers_ArraysOfMatrix) {
+    Enable(ast::Extension::kF16);
 
-    spirv::Builder& b = Build();
-
-    auto id = b.GenerateTypeIfNeeded(program->TypeOf(s));
-    ASSERT_FALSE(b.has_error()) << b.error();
-    EXPECT_EQ(id, 1u);
-
-    EXPECT_EQ(DumpInstructions(b.types()), R"(%4 = OpTypeFloat 32
-%3 = OpTypeVector %4 2
-%2 = OpTypeMatrix %3 2
-%6 = OpTypeVector %4 3
-%5 = OpTypeMatrix %6 2
-%8 = OpTypeVector %4 4
-%7 = OpTypeMatrix %8 4
-%1 = OpTypeStruct %2 %5 %7
-)");
-    EXPECT_EQ(DumpInstructions(b.debug()), R"(OpName %1 "S"
-OpMemberName %1 0 "a"
-OpMemberName %1 1 "b"
-OpMemberName %1 2 "c"
-)");
-    EXPECT_EQ(DumpInstructions(b.annots()), R"(OpMemberDecorate %1 0 Offset 0
-OpMemberDecorate %1 0 ColMajor
-OpMemberDecorate %1 0 MatrixStride 8
-OpMemberDecorate %1 1 Offset 16
-OpMemberDecorate %1 1 ColMajor
-OpMemberDecorate %1 1 MatrixStride 16
-OpMemberDecorate %1 2 Offset 48
-OpMemberDecorate %1 2 ColMajor
-OpMemberDecorate %1 2 MatrixStride 16
-)");
-}
-
-TEST_F(BuilderTest_Type, GenerateStruct_DecoratedMembers_LayoutArraysOfMatrix) {
-    // We have to infer layout for matrix when it also has an offset.
-    // The decoration goes on the struct member, even if the matrix is buried
-    // in levels of arrays.
-    auto* arr_mat2x2 = ty.array(ty.mat2x2<f32>(), 1_u);      // Singly nested array
-    auto* arr_arr_mat2x3 = ty.array(ty.mat2x3<f32>(), 1_u);  // Doubly nested array
+    auto* arr_mat2x2_f32 = ty.array(ty.mat2x2<f32>(), 1_u);  // Singly nested array
+    auto* arr_mat2x2_f16 = ty.array(ty.mat2x2<f16>(), 1_u);  // Singly nested array
+    auto* arr_arr_mat2x3_f32 =
+        ty.array(ty.array(ty.mat2x3<f32>(), 1_u), 2_u);  // Doubly nested array
+    auto* arr_arr_mat2x3_f16 =
+        ty.array(ty.array(ty.mat2x3<f16>(), 1_u), 2_u);      // Doubly nested array
     auto* rtarr_mat4x4 = ty.array(ty.mat4x4<f32>());         // Runtime array
 
-    auto* s = Structure("S", utils::Vector{
-                                 Member("a", arr_mat2x2),
-                                 Member("b", arr_arr_mat2x3),
-                                 Member("c", rtarr_mat4x4),
-                             });
+    auto* s = Structure(
+        "S", utils::Vector{
+                 Member("arr_mat2x2_f32", arr_mat2x2_f32),
+                 Member("arr_mat2x2_f16", arr_mat2x2_f16, utils::Vector{MemberAlign(64_i)}),
+                 Member("arr_arr_mat2x3_f32", arr_arr_mat2x3_f32, utils::Vector{MemberAlign(64_i)}),
+                 Member("arr_arr_mat2x3_f16", arr_arr_mat2x3_f16),
+                 Member("rtarr_mat4x4", rtarr_mat4x4),
+             });
 
     spirv::Builder& b = Build();
 
@@ -462,31 +466,53 @@
 %6 = OpTypeInt 32 0
 %7 = OpConstant %6 1
 %2 = OpTypeArray %3 %7
-%10 = OpTypeVector %5 3
+%11 = OpTypeFloat 16
+%10 = OpTypeVector %11 2
 %9 = OpTypeMatrix %10 2
 %8 = OpTypeArray %9 %7
-%13 = OpTypeVector %5 4
-%12 = OpTypeMatrix %13 4
-%11 = OpTypeRuntimeArray %12
-%1 = OpTypeStruct %2 %8 %11
+%15 = OpTypeVector %5 3
+%14 = OpTypeMatrix %15 2
+%13 = OpTypeArray %14 %7
+%16 = OpConstant %6 2
+%12 = OpTypeArray %13 %16
+%20 = OpTypeVector %11 3
+%19 = OpTypeMatrix %20 2
+%18 = OpTypeArray %19 %7
+%17 = OpTypeArray %18 %16
+%23 = OpTypeVector %5 4
+%22 = OpTypeMatrix %23 4
+%21 = OpTypeRuntimeArray %22
+%1 = OpTypeStruct %2 %8 %12 %17 %21
 )");
     EXPECT_EQ(DumpInstructions(b.debug()), R"(OpName %1 "S"
-OpMemberName %1 0 "a"
-OpMemberName %1 1 "b"
-OpMemberName %1 2 "c"
+OpMemberName %1 0 "arr_mat2x2_f32"
+OpMemberName %1 1 "arr_mat2x2_f16"
+OpMemberName %1 2 "arr_arr_mat2x3_f32"
+OpMemberName %1 3 "arr_arr_mat2x3_f16"
+OpMemberName %1 4 "rtarr_mat4x4"
 )");
     EXPECT_EQ(DumpInstructions(b.annots()), R"(OpMemberDecorate %1 0 Offset 0
 OpMemberDecorate %1 0 ColMajor
 OpMemberDecorate %1 0 MatrixStride 8
 OpDecorate %2 ArrayStride 16
-OpMemberDecorate %1 1 Offset 16
+OpMemberDecorate %1 1 Offset 64
 OpMemberDecorate %1 1 ColMajor
-OpMemberDecorate %1 1 MatrixStride 16
-OpDecorate %8 ArrayStride 32
-OpMemberDecorate %1 2 Offset 48
+OpMemberDecorate %1 1 MatrixStride 4
+OpDecorate %8 ArrayStride 8
+OpMemberDecorate %1 2 Offset 128
 OpMemberDecorate %1 2 ColMajor
 OpMemberDecorate %1 2 MatrixStride 16
-OpDecorate %11 ArrayStride 64
+OpDecorate %13 ArrayStride 32
+OpDecorate %12 ArrayStride 32
+OpMemberDecorate %1 3 Offset 192
+OpMemberDecorate %1 3 ColMajor
+OpMemberDecorate %1 3 MatrixStride 8
+OpDecorate %18 ArrayStride 16
+OpDecorate %17 ArrayStride 16
+OpMemberDecorate %1 4 Offset 224
+OpMemberDecorate %1 4 ColMajor
+OpMemberDecorate %1 4 MatrixStride 16
+OpDecorate %21 ArrayStride 64
 )");
 }
 

diff --git a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.dxc.hlsl
index 87e0902..2694283 100644
--- a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.dxc.hlsl

@@ -1,17 +1,35 @@
-SKIP: FAILED
+ByteAddressBuffer sb_ro : register(t1, space0);
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+void arrayLength_8421b9() {
+  uint tint_symbol_2 = 0u;
+  sb_ro.GetDimensions(tint_symbol_2);
+  const uint tint_symbol_3 = ((tint_symbol_2 - 0u) / 2u);
+  uint res = tint_symbol_3;
+}
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RO {
-^^^^^^
+struct tint_symbol {
+  float4 value : SV_Position;
+};
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
-@group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
+float4 vertex_main_inner() {
+  arrayLength_8421b9();
+  return (0.0f).xxxx;
+}
 
+tint_symbol vertex_main() {
+  const float4 inner_result = vertex_main_inner();
+  tint_symbol wrapper_result = (tint_symbol)0;
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+void fragment_main() {
+  arrayLength_8421b9();
+  return;
+}
+
+[numthreads(1, 1, 1)]
+void compute_main() {
+  arrayLength_8421b9();
+  return;
+}

diff --git a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.fxc.hlsl
index 87e0902..2694283 100644
--- a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.fxc.hlsl

@@ -1,17 +1,35 @@
-SKIP: FAILED
+ByteAddressBuffer sb_ro : register(t1, space0);
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+void arrayLength_8421b9() {
+  uint tint_symbol_2 = 0u;
+  sb_ro.GetDimensions(tint_symbol_2);
+  const uint tint_symbol_3 = ((tint_symbol_2 - 0u) / 2u);
+  uint res = tint_symbol_3;
+}
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RO {
-^^^^^^
+struct tint_symbol {
+  float4 value : SV_Position;
+};
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
-@group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
+float4 vertex_main_inner() {
+  arrayLength_8421b9();
+  return (0.0f).xxxx;
+}
 
+tint_symbol vertex_main() {
+  const float4 inner_result = vertex_main_inner();
+  tint_symbol wrapper_result = (tint_symbol)0;
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+void fragment_main() {
+  arrayLength_8421b9();
+  return;
+}
+
+[numthreads(1, 1, 1)]
+void compute_main() {
+  arrayLength_8421b9();
+  return;
+}

diff --git a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.glsl b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.glsl
index 87e0902..9375052 100644
--- a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.glsl
+++ b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.glsl

@@ -1,17 +1,64 @@
-SKIP: FAILED
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+layout(binding = 1, std430) buffer SB_RO_ssbo {
+  float16_t arg_0[];
+} sb_ro;
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RO {
-^^^^^^
+void arrayLength_8421b9() {
+  uint res = uint(sb_ro.arg_0.length());
+}
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
-@group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
+vec4 vertex_main() {
+  arrayLength_8421b9();
+  return vec4(0.0f);
+}
 
+void main() {
+  gl_PointSize = 1.0;
+  vec4 inner_result = vertex_main();
+  gl_Position = inner_result;
+  gl_Position.y = -(gl_Position.y);
+  gl_Position.z = ((2.0f * gl_Position.z) - gl_Position.w);
+  return;
+}
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+precision mediump float;
+
+layout(binding = 1, std430) buffer SB_RO_ssbo {
+  float16_t arg_0[];
+} sb_ro;
+
+void arrayLength_8421b9() {
+  uint res = uint(sb_ro.arg_0.length());
+}
+
+void fragment_main() {
+  arrayLength_8421b9();
+}
+
+void main() {
+  fragment_main();
+  return;
+}
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 1, std430) buffer SB_RO_ssbo {
+  float16_t arg_0[];
+} sb_ro;
+
+void arrayLength_8421b9() {
+  uint res = uint(sb_ro.arg_0.length());
+}
+
+void compute_main() {
+  arrayLength_8421b9();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  compute_main();
+  return;
+}

diff --git a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.msl b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.msl
index 87e0902..3661b22 100644
--- a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.msl

@@ -1,17 +1,54 @@
-SKIP: FAILED
+#include <metal_stdlib>
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+using namespace metal;
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_1 {
+  /* 0x0000 */ tint_array<uint4, 1> buffer_size;
+};
+
 struct SB_RO {
-^^^^^^
+  tint_array<half, 1> arg_0;
+};
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
-@group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
+void arrayLength_8421b9(const constant tint_symbol_1* const tint_symbol_3) {
+  uint res = (((*(tint_symbol_3)).buffer_size[0u][0u] - 0u) / 2u);
+}
+
+struct tint_symbol {
+  float4 value [[position]];
+};
+
+float4 vertex_main_inner(const constant tint_symbol_1* const tint_symbol_4) {
+  arrayLength_8421b9(tint_symbol_4);
+  return float4(0.0f);
+}
+
+vertex tint_symbol vertex_main(const constant tint_symbol_1* tint_symbol_5 [[buffer(30)]]) {
+  float4 const inner_result = vertex_main_inner(tint_symbol_5);
+  tint_symbol wrapper_result = {};
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+fragment void fragment_main(const constant tint_symbol_1* tint_symbol_6 [[buffer(30)]]) {
+  arrayLength_8421b9(tint_symbol_6);
+  return;
+}
+
+kernel void compute_main(const constant tint_symbol_1* tint_symbol_7 [[buffer(30)]]) {
+  arrayLength_8421b9(tint_symbol_7);
+  return;
+}
 

diff --git a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.spvasm
index 87e0902..485ba20 100644
--- a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.spvasm

@@ -1,17 +1,84 @@
-SKIP: FAILED
-
-builtins/gen/literal/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
-
-builtins/gen/literal/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RO {
-^^^^^^
-
-builtins/gen/literal/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
-@group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
-
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %vertex_main "vertex_main" %value %vertex_point_size
+               OpEntryPoint Fragment %fragment_main "fragment_main"
+               OpEntryPoint GLCompute %compute_main "compute_main"
+               OpExecutionMode %fragment_main OriginUpperLeft
+               OpExecutionMode %compute_main LocalSize 1 1 1
+               OpName %value "value"
+               OpName %vertex_point_size "vertex_point_size"
+               OpName %SB_RO "SB_RO"
+               OpMemberName %SB_RO 0 "arg_0"
+               OpName %sb_ro "sb_ro"
+               OpName %arrayLength_8421b9 "arrayLength_8421b9"
+               OpName %res "res"
+               OpName %vertex_main_inner "vertex_main_inner"
+               OpName %vertex_main "vertex_main"
+               OpName %fragment_main "fragment_main"
+               OpName %compute_main "compute_main"
+               OpDecorate %value BuiltIn Position
+               OpDecorate %vertex_point_size BuiltIn PointSize
+               OpDecorate %SB_RO Block
+               OpMemberDecorate %SB_RO 0 Offset 0
+               OpDecorate %_runtimearr_half ArrayStride 2
+               OpDecorate %sb_ro NonWritable
+               OpDecorate %sb_ro DescriptorSet 0
+               OpDecorate %sb_ro Binding 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %5 = OpConstantNull %v4float
+      %value = OpVariable %_ptr_Output_v4float Output %5
+%_ptr_Output_float = OpTypePointer Output %float
+          %8 = OpConstantNull %float
+%vertex_point_size = OpVariable %_ptr_Output_float Output %8
+       %half = OpTypeFloat 16
+%_runtimearr_half = OpTypeRuntimeArray %half
+      %SB_RO = OpTypeStruct %_runtimearr_half
+%_ptr_StorageBuffer_SB_RO = OpTypePointer StorageBuffer %SB_RO
+      %sb_ro = OpVariable %_ptr_StorageBuffer_SB_RO StorageBuffer
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %22 = OpConstantNull %uint
+         %23 = OpTypeFunction %v4float
+    %float_1 = OpConstant %float 1
+%arrayLength_8421b9 = OpFunction %void None %14
+         %17 = OpLabel
+        %res = OpVariable %_ptr_Function_uint Function %22
+         %18 = OpArrayLength %uint %sb_ro 0
+               OpStore %res %18
+               OpReturn
+               OpFunctionEnd
+%vertex_main_inner = OpFunction %v4float None %23
+         %25 = OpLabel
+         %26 = OpFunctionCall %void %arrayLength_8421b9
+               OpReturnValue %5
+               OpFunctionEnd
+%vertex_main = OpFunction %void None %14
+         %28 = OpLabel
+         %29 = OpFunctionCall %v4float %vertex_main_inner
+               OpStore %value %29
+               OpStore %vertex_point_size %float_1
+               OpReturn
+               OpFunctionEnd
+%fragment_main = OpFunction %void None %14
+         %32 = OpLabel
+         %33 = OpFunctionCall %void %arrayLength_8421b9
+               OpReturn
+               OpFunctionEnd
+%compute_main = OpFunction %void None %14
+         %35 = OpLabel
+         %36 = OpFunctionCall %void %arrayLength_8421b9
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.wgsl b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.wgsl
index 87e0902..2d6dca3 100644
--- a/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.wgsl
+++ b/test/tint/builtins/gen/literal/arrayLength/8421b9.wgsl.expected.wgsl

@@ -1,17 +1,27 @@
-SKIP: FAILED
+enable f16;
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
-
-builtins/gen/literal/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
 struct SB_RO {
-^^^^^^
+  arg_0 : array<f16>,
+}
 
-builtins/gen/literal/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
 @group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
 
+fn arrayLength_8421b9() {
+  var res : u32 = arrayLength(&(sb_ro.arg_0));
+}
+
+@vertex
+fn vertex_main() -> @builtin(position) vec4<f32> {
+  arrayLength_8421b9();
+  return vec4<f32>();
+}
+
+@fragment
+fn fragment_main() {
+  arrayLength_8421b9();
+}
+
+@compute @workgroup_size(1)
+fn compute_main() {
+  arrayLength_8421b9();
+}

diff --git a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.dxc.hlsl
index 7849c68..c62bc23 100644
--- a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.dxc.hlsl

@@ -1,17 +1,35 @@
-SKIP: FAILED
+RWByteAddressBuffer sb_rw : register(u0, space0);
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+void arrayLength_cbd6b5() {
+  uint tint_symbol_2 = 0u;
+  sb_rw.GetDimensions(tint_symbol_2);
+  const uint tint_symbol_3 = ((tint_symbol_2 - 0u) / 2u);
+  uint res = tint_symbol_3;
+}
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RW {
-^^^^^^
+struct tint_symbol {
+  float4 value : SV_Position;
+};
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
+float4 vertex_main_inner() {
+  arrayLength_cbd6b5();
+  return (0.0f).xxxx;
+}
 
+tint_symbol vertex_main() {
+  const float4 inner_result = vertex_main_inner();
+  tint_symbol wrapper_result = (tint_symbol)0;
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+void fragment_main() {
+  arrayLength_cbd6b5();
+  return;
+}
+
+[numthreads(1, 1, 1)]
+void compute_main() {
+  arrayLength_cbd6b5();
+  return;
+}

diff --git a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.fxc.hlsl
index 7849c68..c62bc23 100644
--- a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.fxc.hlsl

@@ -1,17 +1,35 @@
-SKIP: FAILED
+RWByteAddressBuffer sb_rw : register(u0, space0);
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+void arrayLength_cbd6b5() {
+  uint tint_symbol_2 = 0u;
+  sb_rw.GetDimensions(tint_symbol_2);
+  const uint tint_symbol_3 = ((tint_symbol_2 - 0u) / 2u);
+  uint res = tint_symbol_3;
+}
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RW {
-^^^^^^
+struct tint_symbol {
+  float4 value : SV_Position;
+};
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
+float4 vertex_main_inner() {
+  arrayLength_cbd6b5();
+  return (0.0f).xxxx;
+}
 
+tint_symbol vertex_main() {
+  const float4 inner_result = vertex_main_inner();
+  tint_symbol wrapper_result = (tint_symbol)0;
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+void fragment_main() {
+  arrayLength_cbd6b5();
+  return;
+}
+
+[numthreads(1, 1, 1)]
+void compute_main() {
+  arrayLength_cbd6b5();
+  return;
+}

diff --git a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.glsl b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.glsl
index 7849c68..aeaf5a3 100644
--- a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.glsl
+++ b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.glsl

@@ -1,17 +1,64 @@
-SKIP: FAILED
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+layout(binding = 0, std430) buffer SB_RW_ssbo {
+  float16_t arg_0[];
+} sb_rw;
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RW {
-^^^^^^
+void arrayLength_cbd6b5() {
+  uint res = uint(sb_rw.arg_0.length());
+}
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
+vec4 vertex_main() {
+  arrayLength_cbd6b5();
+  return vec4(0.0f);
+}
 
+void main() {
+  gl_PointSize = 1.0;
+  vec4 inner_result = vertex_main();
+  gl_Position = inner_result;
+  gl_Position.y = -(gl_Position.y);
+  gl_Position.z = ((2.0f * gl_Position.z) - gl_Position.w);
+  return;
+}
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+precision mediump float;
+
+layout(binding = 0, std430) buffer SB_RW_ssbo {
+  float16_t arg_0[];
+} sb_rw;
+
+void arrayLength_cbd6b5() {
+  uint res = uint(sb_rw.arg_0.length());
+}
+
+void fragment_main() {
+  arrayLength_cbd6b5();
+}
+
+void main() {
+  fragment_main();
+  return;
+}
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer SB_RW_ssbo {
+  float16_t arg_0[];
+} sb_rw;
+
+void arrayLength_cbd6b5() {
+  uint res = uint(sb_rw.arg_0.length());
+}
+
+void compute_main() {
+  arrayLength_cbd6b5();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  compute_main();
+  return;
+}

diff --git a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.msl b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.msl
index 7849c68..86034e4 100644
--- a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.msl
+++ b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.msl

@@ -1,17 +1,54 @@
-SKIP: FAILED
+#include <metal_stdlib>
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+using namespace metal;
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_1 {
+  /* 0x0000 */ tint_array<uint4, 1> buffer_size;
+};
+
 struct SB_RW {
-^^^^^^
+  tint_array<half, 1> arg_0;
+};
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
+void arrayLength_cbd6b5(const constant tint_symbol_1* const tint_symbol_3) {
+  uint res = (((*(tint_symbol_3)).buffer_size[0u][0u] - 0u) / 2u);
+}
+
+struct tint_symbol {
+  float4 value [[position]];
+};
+
+float4 vertex_main_inner(const constant tint_symbol_1* const tint_symbol_4) {
+  arrayLength_cbd6b5(tint_symbol_4);
+  return float4(0.0f);
+}
+
+vertex tint_symbol vertex_main(const constant tint_symbol_1* tint_symbol_5 [[buffer(30)]]) {
+  float4 const inner_result = vertex_main_inner(tint_symbol_5);
+  tint_symbol wrapper_result = {};
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+fragment void fragment_main(const constant tint_symbol_1* tint_symbol_6 [[buffer(30)]]) {
+  arrayLength_cbd6b5(tint_symbol_6);
+  return;
+}
+
+kernel void compute_main(const constant tint_symbol_1* tint_symbol_7 [[buffer(30)]]) {
+  arrayLength_cbd6b5(tint_symbol_7);
+  return;
+}
 

diff --git a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.spvasm b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.spvasm
index 7849c68..15031de 100644
--- a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.spvasm

@@ -1,17 +1,83 @@
-SKIP: FAILED
-
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
-
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RW {
-^^^^^^
-
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
-
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %vertex_main "vertex_main" %value %vertex_point_size
+               OpEntryPoint Fragment %fragment_main "fragment_main"
+               OpEntryPoint GLCompute %compute_main "compute_main"
+               OpExecutionMode %fragment_main OriginUpperLeft
+               OpExecutionMode %compute_main LocalSize 1 1 1
+               OpName %value "value"
+               OpName %vertex_point_size "vertex_point_size"
+               OpName %SB_RW "SB_RW"
+               OpMemberName %SB_RW 0 "arg_0"
+               OpName %sb_rw "sb_rw"
+               OpName %arrayLength_cbd6b5 "arrayLength_cbd6b5"
+               OpName %res "res"
+               OpName %vertex_main_inner "vertex_main_inner"
+               OpName %vertex_main "vertex_main"
+               OpName %fragment_main "fragment_main"
+               OpName %compute_main "compute_main"
+               OpDecorate %value BuiltIn Position
+               OpDecorate %vertex_point_size BuiltIn PointSize
+               OpDecorate %SB_RW Block
+               OpMemberDecorate %SB_RW 0 Offset 0
+               OpDecorate %_runtimearr_half ArrayStride 2
+               OpDecorate %sb_rw DescriptorSet 0
+               OpDecorate %sb_rw Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %5 = OpConstantNull %v4float
+      %value = OpVariable %_ptr_Output_v4float Output %5
+%_ptr_Output_float = OpTypePointer Output %float
+          %8 = OpConstantNull %float
+%vertex_point_size = OpVariable %_ptr_Output_float Output %8
+       %half = OpTypeFloat 16
+%_runtimearr_half = OpTypeRuntimeArray %half
+      %SB_RW = OpTypeStruct %_runtimearr_half
+%_ptr_StorageBuffer_SB_RW = OpTypePointer StorageBuffer %SB_RW
+      %sb_rw = OpVariable %_ptr_StorageBuffer_SB_RW StorageBuffer
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %22 = OpConstantNull %uint
+         %23 = OpTypeFunction %v4float
+    %float_1 = OpConstant %float 1
+%arrayLength_cbd6b5 = OpFunction %void None %14
+         %17 = OpLabel
+        %res = OpVariable %_ptr_Function_uint Function %22
+         %18 = OpArrayLength %uint %sb_rw 0
+               OpStore %res %18
+               OpReturn
+               OpFunctionEnd
+%vertex_main_inner = OpFunction %v4float None %23
+         %25 = OpLabel
+         %26 = OpFunctionCall %void %arrayLength_cbd6b5
+               OpReturnValue %5
+               OpFunctionEnd
+%vertex_main = OpFunction %void None %14
+         %28 = OpLabel
+         %29 = OpFunctionCall %v4float %vertex_main_inner
+               OpStore %value %29
+               OpStore %vertex_point_size %float_1
+               OpReturn
+               OpFunctionEnd
+%fragment_main = OpFunction %void None %14
+         %32 = OpLabel
+         %33 = OpFunctionCall %void %arrayLength_cbd6b5
+               OpReturn
+               OpFunctionEnd
+%compute_main = OpFunction %void None %14
+         %35 = OpLabel
+         %36 = OpFunctionCall %void %arrayLength_cbd6b5
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.wgsl b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.wgsl
index 7849c68..c025591 100644
--- a/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.wgsl
+++ b/test/tint/builtins/gen/literal/arrayLength/cbd6b5.wgsl.expected.wgsl

@@ -1,17 +1,27 @@
-SKIP: FAILED
+enable f16;
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
-
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
 struct SB_RW {
-^^^^^^
+  arg_0 : array<f16>,
+}
 
-builtins/gen/literal/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
 @group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
 
+fn arrayLength_cbd6b5() {
+  var res : u32 = arrayLength(&(sb_rw.arg_0));
+}
+
+@vertex
+fn vertex_main() -> @builtin(position) vec4<f32> {
+  arrayLength_cbd6b5();
+  return vec4<f32>();
+}
+
+@fragment
+fn fragment_main() {
+  arrayLength_cbd6b5();
+}
+
+@compute @workgroup_size(1)
+fn compute_main() {
+  arrayLength_cbd6b5();
+}

diff --git a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.dxc.hlsl
index fc94710..2694283 100644
--- a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.dxc.hlsl

@@ -1,17 +1,35 @@
-SKIP: FAILED
+ByteAddressBuffer sb_ro : register(t1, space0);
 
-builtins/gen/var/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+void arrayLength_8421b9() {
+  uint tint_symbol_2 = 0u;
+  sb_ro.GetDimensions(tint_symbol_2);
+  const uint tint_symbol_3 = ((tint_symbol_2 - 0u) / 2u);
+  uint res = tint_symbol_3;
+}
 
-builtins/gen/var/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RO {
-^^^^^^
+struct tint_symbol {
+  float4 value : SV_Position;
+};
 
-builtins/gen/var/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
-@group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
+float4 vertex_main_inner() {
+  arrayLength_8421b9();
+  return (0.0f).xxxx;
+}
 
+tint_symbol vertex_main() {
+  const float4 inner_result = vertex_main_inner();
+  tint_symbol wrapper_result = (tint_symbol)0;
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+void fragment_main() {
+  arrayLength_8421b9();
+  return;
+}
+
+[numthreads(1, 1, 1)]
+void compute_main() {
+  arrayLength_8421b9();
+  return;
+}

diff --git a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.fxc.hlsl
index fc94710..2694283 100644
--- a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.fxc.hlsl

@@ -1,17 +1,35 @@
-SKIP: FAILED
+ByteAddressBuffer sb_ro : register(t1, space0);
 
-builtins/gen/var/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+void arrayLength_8421b9() {
+  uint tint_symbol_2 = 0u;
+  sb_ro.GetDimensions(tint_symbol_2);
+  const uint tint_symbol_3 = ((tint_symbol_2 - 0u) / 2u);
+  uint res = tint_symbol_3;
+}
 
-builtins/gen/var/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RO {
-^^^^^^
+struct tint_symbol {
+  float4 value : SV_Position;
+};
 
-builtins/gen/var/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
-@group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
+float4 vertex_main_inner() {
+  arrayLength_8421b9();
+  return (0.0f).xxxx;
+}
 
+tint_symbol vertex_main() {
+  const float4 inner_result = vertex_main_inner();
+  tint_symbol wrapper_result = (tint_symbol)0;
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+void fragment_main() {
+  arrayLength_8421b9();
+  return;
+}
+
+[numthreads(1, 1, 1)]
+void compute_main() {
+  arrayLength_8421b9();
+  return;
+}

diff --git a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.glsl b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.glsl
index fc94710..9375052 100644
--- a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.glsl
+++ b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.glsl

@@ -1,17 +1,64 @@
-SKIP: FAILED
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
 
-builtins/gen/var/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+layout(binding = 1, std430) buffer SB_RO_ssbo {
+  float16_t arg_0[];
+} sb_ro;
 
-builtins/gen/var/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RO {
-^^^^^^
+void arrayLength_8421b9() {
+  uint res = uint(sb_ro.arg_0.length());
+}
 
-builtins/gen/var/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
-@group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
+vec4 vertex_main() {
+  arrayLength_8421b9();
+  return vec4(0.0f);
+}
 
+void main() {
+  gl_PointSize = 1.0;
+  vec4 inner_result = vertex_main();
+  gl_Position = inner_result;
+  gl_Position.y = -(gl_Position.y);
+  gl_Position.z = ((2.0f * gl_Position.z) - gl_Position.w);
+  return;
+}
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+precision mediump float;
+
+layout(binding = 1, std430) buffer SB_RO_ssbo {
+  float16_t arg_0[];
+} sb_ro;
+
+void arrayLength_8421b9() {
+  uint res = uint(sb_ro.arg_0.length());
+}
+
+void fragment_main() {
+  arrayLength_8421b9();
+}
+
+void main() {
+  fragment_main();
+  return;
+}
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 1, std430) buffer SB_RO_ssbo {
+  float16_t arg_0[];
+} sb_ro;
+
+void arrayLength_8421b9() {
+  uint res = uint(sb_ro.arg_0.length());
+}
+
+void compute_main() {
+  arrayLength_8421b9();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  compute_main();
+  return;
+}

diff --git a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.msl b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.msl
index fc94710..3661b22 100644
--- a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.msl

@@ -1,17 +1,54 @@
-SKIP: FAILED
+#include <metal_stdlib>
 
-builtins/gen/var/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+using namespace metal;
 
-builtins/gen/var/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_1 {
+  /* 0x0000 */ tint_array<uint4, 1> buffer_size;
+};
+
 struct SB_RO {
-^^^^^^
+  tint_array<half, 1> arg_0;
+};
 
-builtins/gen/var/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
-@group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
+void arrayLength_8421b9(const constant tint_symbol_1* const tint_symbol_3) {
+  uint res = (((*(tint_symbol_3)).buffer_size[0u][0u] - 0u) / 2u);
+}
+
+struct tint_symbol {
+  float4 value [[position]];
+};
+
+float4 vertex_main_inner(const constant tint_symbol_1* const tint_symbol_4) {
+  arrayLength_8421b9(tint_symbol_4);
+  return float4(0.0f);
+}
+
+vertex tint_symbol vertex_main(const constant tint_symbol_1* tint_symbol_5 [[buffer(30)]]) {
+  float4 const inner_result = vertex_main_inner(tint_symbol_5);
+  tint_symbol wrapper_result = {};
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+fragment void fragment_main(const constant tint_symbol_1* tint_symbol_6 [[buffer(30)]]) {
+  arrayLength_8421b9(tint_symbol_6);
+  return;
+}
+
+kernel void compute_main(const constant tint_symbol_1* tint_symbol_7 [[buffer(30)]]) {
+  arrayLength_8421b9(tint_symbol_7);
+  return;
+}
 

diff --git a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.spvasm b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.spvasm
index fc94710..485ba20 100644
--- a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.spvasm

@@ -1,17 +1,84 @@
-SKIP: FAILED
-
-builtins/gen/var/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
-
-builtins/gen/var/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RO {
-^^^^^^
-
-builtins/gen/var/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
-@group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
-
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %vertex_main "vertex_main" %value %vertex_point_size
+               OpEntryPoint Fragment %fragment_main "fragment_main"
+               OpEntryPoint GLCompute %compute_main "compute_main"
+               OpExecutionMode %fragment_main OriginUpperLeft
+               OpExecutionMode %compute_main LocalSize 1 1 1
+               OpName %value "value"
+               OpName %vertex_point_size "vertex_point_size"
+               OpName %SB_RO "SB_RO"
+               OpMemberName %SB_RO 0 "arg_0"
+               OpName %sb_ro "sb_ro"
+               OpName %arrayLength_8421b9 "arrayLength_8421b9"
+               OpName %res "res"
+               OpName %vertex_main_inner "vertex_main_inner"
+               OpName %vertex_main "vertex_main"
+               OpName %fragment_main "fragment_main"
+               OpName %compute_main "compute_main"
+               OpDecorate %value BuiltIn Position
+               OpDecorate %vertex_point_size BuiltIn PointSize
+               OpDecorate %SB_RO Block
+               OpMemberDecorate %SB_RO 0 Offset 0
+               OpDecorate %_runtimearr_half ArrayStride 2
+               OpDecorate %sb_ro NonWritable
+               OpDecorate %sb_ro DescriptorSet 0
+               OpDecorate %sb_ro Binding 1
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %5 = OpConstantNull %v4float
+      %value = OpVariable %_ptr_Output_v4float Output %5
+%_ptr_Output_float = OpTypePointer Output %float
+          %8 = OpConstantNull %float
+%vertex_point_size = OpVariable %_ptr_Output_float Output %8
+       %half = OpTypeFloat 16
+%_runtimearr_half = OpTypeRuntimeArray %half
+      %SB_RO = OpTypeStruct %_runtimearr_half
+%_ptr_StorageBuffer_SB_RO = OpTypePointer StorageBuffer %SB_RO
+      %sb_ro = OpVariable %_ptr_StorageBuffer_SB_RO StorageBuffer
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %22 = OpConstantNull %uint
+         %23 = OpTypeFunction %v4float
+    %float_1 = OpConstant %float 1
+%arrayLength_8421b9 = OpFunction %void None %14
+         %17 = OpLabel
+        %res = OpVariable %_ptr_Function_uint Function %22
+         %18 = OpArrayLength %uint %sb_ro 0
+               OpStore %res %18
+               OpReturn
+               OpFunctionEnd
+%vertex_main_inner = OpFunction %v4float None %23
+         %25 = OpLabel
+         %26 = OpFunctionCall %void %arrayLength_8421b9
+               OpReturnValue %5
+               OpFunctionEnd
+%vertex_main = OpFunction %void None %14
+         %28 = OpLabel
+         %29 = OpFunctionCall %v4float %vertex_main_inner
+               OpStore %value %29
+               OpStore %vertex_point_size %float_1
+               OpReturn
+               OpFunctionEnd
+%fragment_main = OpFunction %void None %14
+         %32 = OpLabel
+         %33 = OpFunctionCall %void %arrayLength_8421b9
+               OpReturn
+               OpFunctionEnd
+%compute_main = OpFunction %void None %14
+         %35 = OpLabel
+         %36 = OpFunctionCall %void %arrayLength_8421b9
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.wgsl b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.wgsl
index fc94710..2d6dca3 100644
--- a/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.wgsl
+++ b/test/tint/builtins/gen/var/arrayLength/8421b9.wgsl.expected.wgsl

@@ -1,17 +1,27 @@
-SKIP: FAILED
+enable f16;
 
-builtins/gen/var/arrayLength/8421b9.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
-
-builtins/gen/var/arrayLength/8421b9.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RO {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
 struct SB_RO {
-^^^^^^
+  arg_0 : array<f16>,
+}
 
-builtins/gen/var/arrayLength/8421b9.wgsl:28:42 note: see declaration of variable
 @group(0) @binding(1) var<storage, read> sb_ro : SB_RO;
-                                         ^^^^^
 
+fn arrayLength_8421b9() {
+  var res : u32 = arrayLength(&(sb_ro.arg_0));
+}
+
+@vertex
+fn vertex_main() -> @builtin(position) vec4<f32> {
+  arrayLength_8421b9();
+  return vec4<f32>();
+}
+
+@fragment
+fn fragment_main() {
+  arrayLength_8421b9();
+}
+
+@compute @workgroup_size(1)
+fn compute_main() {
+  arrayLength_8421b9();
+}

diff --git a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.dxc.hlsl b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.dxc.hlsl
index f19d93a..c62bc23 100644
--- a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.dxc.hlsl
+++ b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.dxc.hlsl

@@ -1,17 +1,35 @@
-SKIP: FAILED
+RWByteAddressBuffer sb_rw : register(u0, space0);
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+void arrayLength_cbd6b5() {
+  uint tint_symbol_2 = 0u;
+  sb_rw.GetDimensions(tint_symbol_2);
+  const uint tint_symbol_3 = ((tint_symbol_2 - 0u) / 2u);
+  uint res = tint_symbol_3;
+}
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RW {
-^^^^^^
+struct tint_symbol {
+  float4 value : SV_Position;
+};
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
+float4 vertex_main_inner() {
+  arrayLength_cbd6b5();
+  return (0.0f).xxxx;
+}
 
+tint_symbol vertex_main() {
+  const float4 inner_result = vertex_main_inner();
+  tint_symbol wrapper_result = (tint_symbol)0;
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+void fragment_main() {
+  arrayLength_cbd6b5();
+  return;
+}
+
+[numthreads(1, 1, 1)]
+void compute_main() {
+  arrayLength_cbd6b5();
+  return;
+}

diff --git a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.fxc.hlsl b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.fxc.hlsl
index f19d93a..c62bc23 100644
--- a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.fxc.hlsl
+++ b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.fxc.hlsl

@@ -1,17 +1,35 @@
-SKIP: FAILED
+RWByteAddressBuffer sb_rw : register(u0, space0);
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+void arrayLength_cbd6b5() {
+  uint tint_symbol_2 = 0u;
+  sb_rw.GetDimensions(tint_symbol_2);
+  const uint tint_symbol_3 = ((tint_symbol_2 - 0u) / 2u);
+  uint res = tint_symbol_3;
+}
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RW {
-^^^^^^
+struct tint_symbol {
+  float4 value : SV_Position;
+};
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
+float4 vertex_main_inner() {
+  arrayLength_cbd6b5();
+  return (0.0f).xxxx;
+}
 
+tint_symbol vertex_main() {
+  const float4 inner_result = vertex_main_inner();
+  tint_symbol wrapper_result = (tint_symbol)0;
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+void fragment_main() {
+  arrayLength_cbd6b5();
+  return;
+}
+
+[numthreads(1, 1, 1)]
+void compute_main() {
+  arrayLength_cbd6b5();
+  return;
+}

diff --git a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.glsl b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.glsl
index f19d93a..aeaf5a3 100644
--- a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.glsl
+++ b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.glsl

@@ -1,17 +1,64 @@
-SKIP: FAILED
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+layout(binding = 0, std430) buffer SB_RW_ssbo {
+  float16_t arg_0[];
+} sb_rw;
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RW {
-^^^^^^
+void arrayLength_cbd6b5() {
+  uint res = uint(sb_rw.arg_0.length());
+}
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
+vec4 vertex_main() {
+  arrayLength_cbd6b5();
+  return vec4(0.0f);
+}
 
+void main() {
+  gl_PointSize = 1.0;
+  vec4 inner_result = vertex_main();
+  gl_Position = inner_result;
+  gl_Position.y = -(gl_Position.y);
+  gl_Position.z = ((2.0f * gl_Position.z) - gl_Position.w);
+  return;
+}
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+precision mediump float;
+
+layout(binding = 0, std430) buffer SB_RW_ssbo {
+  float16_t arg_0[];
+} sb_rw;
+
+void arrayLength_cbd6b5() {
+  uint res = uint(sb_rw.arg_0.length());
+}
+
+void fragment_main() {
+  arrayLength_cbd6b5();
+}
+
+void main() {
+  fragment_main();
+  return;
+}
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(binding = 0, std430) buffer SB_RW_ssbo {
+  float16_t arg_0[];
+} sb_rw;
+
+void arrayLength_cbd6b5() {
+  uint res = uint(sb_rw.arg_0.length());
+}
+
+void compute_main() {
+  arrayLength_cbd6b5();
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  compute_main();
+  return;
+}

diff --git a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.msl b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.msl
index f19d93a..86034e4 100644
--- a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.msl
+++ b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.msl

@@ -1,17 +1,54 @@
-SKIP: FAILED
+#include <metal_stdlib>
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
+using namespace metal;
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_1 {
+  /* 0x0000 */ tint_array<uint4, 1> buffer_size;
+};
+
 struct SB_RW {
-^^^^^^
+  tint_array<half, 1> arg_0;
+};
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
+void arrayLength_cbd6b5(const constant tint_symbol_1* const tint_symbol_3) {
+  uint res = (((*(tint_symbol_3)).buffer_size[0u][0u] - 0u) / 2u);
+}
+
+struct tint_symbol {
+  float4 value [[position]];
+};
+
+float4 vertex_main_inner(const constant tint_symbol_1* const tint_symbol_4) {
+  arrayLength_cbd6b5(tint_symbol_4);
+  return float4(0.0f);
+}
+
+vertex tint_symbol vertex_main(const constant tint_symbol_1* tint_symbol_5 [[buffer(30)]]) {
+  float4 const inner_result = vertex_main_inner(tint_symbol_5);
+  tint_symbol wrapper_result = {};
+  wrapper_result.value = inner_result;
+  return wrapper_result;
+}
+
+fragment void fragment_main(const constant tint_symbol_1* tint_symbol_6 [[buffer(30)]]) {
+  arrayLength_cbd6b5(tint_symbol_6);
+  return;
+}
+
+kernel void compute_main(const constant tint_symbol_1* tint_symbol_7 [[buffer(30)]]) {
+  arrayLength_cbd6b5(tint_symbol_7);
+  return;
+}
 

diff --git a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.spvasm b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.spvasm
index f19d93a..15031de 100644
--- a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.spvasm
+++ b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.spvasm

@@ -1,17 +1,83 @@
-SKIP: FAILED
-
-builtins/gen/var/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
-
-builtins/gen/var/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
-struct SB_RW {
-^^^^^^
-
-builtins/gen/var/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
-
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %vertex_main "vertex_main" %value %vertex_point_size
+               OpEntryPoint Fragment %fragment_main "fragment_main"
+               OpEntryPoint GLCompute %compute_main "compute_main"
+               OpExecutionMode %fragment_main OriginUpperLeft
+               OpExecutionMode %compute_main LocalSize 1 1 1
+               OpName %value "value"
+               OpName %vertex_point_size "vertex_point_size"
+               OpName %SB_RW "SB_RW"
+               OpMemberName %SB_RW 0 "arg_0"
+               OpName %sb_rw "sb_rw"
+               OpName %arrayLength_cbd6b5 "arrayLength_cbd6b5"
+               OpName %res "res"
+               OpName %vertex_main_inner "vertex_main_inner"
+               OpName %vertex_main "vertex_main"
+               OpName %fragment_main "fragment_main"
+               OpName %compute_main "compute_main"
+               OpDecorate %value BuiltIn Position
+               OpDecorate %vertex_point_size BuiltIn PointSize
+               OpDecorate %SB_RW Block
+               OpMemberDecorate %SB_RW 0 Offset 0
+               OpDecorate %_runtimearr_half ArrayStride 2
+               OpDecorate %sb_rw DescriptorSet 0
+               OpDecorate %sb_rw Binding 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %5 = OpConstantNull %v4float
+      %value = OpVariable %_ptr_Output_v4float Output %5
+%_ptr_Output_float = OpTypePointer Output %float
+          %8 = OpConstantNull %float
+%vertex_point_size = OpVariable %_ptr_Output_float Output %8
+       %half = OpTypeFloat 16
+%_runtimearr_half = OpTypeRuntimeArray %half
+      %SB_RW = OpTypeStruct %_runtimearr_half
+%_ptr_StorageBuffer_SB_RW = OpTypePointer StorageBuffer %SB_RW
+      %sb_rw = OpVariable %_ptr_StorageBuffer_SB_RW StorageBuffer
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %22 = OpConstantNull %uint
+         %23 = OpTypeFunction %v4float
+    %float_1 = OpConstant %float 1
+%arrayLength_cbd6b5 = OpFunction %void None %14
+         %17 = OpLabel
+        %res = OpVariable %_ptr_Function_uint Function %22
+         %18 = OpArrayLength %uint %sb_rw 0
+               OpStore %res %18
+               OpReturn
+               OpFunctionEnd
+%vertex_main_inner = OpFunction %v4float None %23
+         %25 = OpLabel
+         %26 = OpFunctionCall %void %arrayLength_cbd6b5
+               OpReturnValue %5
+               OpFunctionEnd
+%vertex_main = OpFunction %void None %14
+         %28 = OpLabel
+         %29 = OpFunctionCall %v4float %vertex_main_inner
+               OpStore %value %29
+               OpStore %vertex_point_size %float_1
+               OpReturn
+               OpFunctionEnd
+%fragment_main = OpFunction %void None %14
+         %32 = OpLabel
+         %33 = OpFunctionCall %void %arrayLength_cbd6b5
+               OpReturn
+               OpFunctionEnd
+%compute_main = OpFunction %void None %14
+         %35 = OpLabel
+         %36 = OpFunctionCall %void %arrayLength_cbd6b5
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.wgsl b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.wgsl
index f19d93a..c025591 100644
--- a/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.wgsl
+++ b/test/tint/builtins/gen/var/arrayLength/cbd6b5.wgsl.expected.wgsl

@@ -1,17 +1,27 @@
-SKIP: FAILED
+enable f16;
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:26:10 error: using f16 types in 'storage' address space is not implemented yet
-  arg_0: array<f16>,
-         ^^^^^^^^^^
-
-builtins/gen/var/arrayLength/cbd6b5.wgsl:25:1 note: see layout of struct:
-/*           align(2) size(2) */ struct SB_RW {
-/* offset(0) align(2) size(2) */   arg_0 : array<f16>;
-/*                            */ };
 struct SB_RW {
-^^^^^^
+  arg_0 : array<f16>,
+}
 
-builtins/gen/var/arrayLength/cbd6b5.wgsl:28:48 note: see declaration of variable
 @group(0) @binding(0) var<storage, read_write> sb_rw : SB_RW;
-                                               ^^^^^
 
+fn arrayLength_cbd6b5() {
+  var res : u32 = arrayLength(&(sb_rw.arg_0));
+}
+
+@vertex
+fn vertex_main() -> @builtin(position) vec4<f32> {
+  arrayLength_cbd6b5();
+  return vec4<f32>();
+}
+
+@fragment
+fn fragment_main() {
+  arrayLength_cbd6b5();
+}
+
+@compute @workgroup_size(1)
+fn compute_main() {
+  arrayLength_cbd6b5();
+}

diff --git a/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.dxc.hlsl b/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.dxc.hlsl
index ae79b76..523d7b4 100644
--- a/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.dxc.hlsl
+++ b/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.dxc.hlsl

@@ -1,20 +1,21 @@
-SKIP: FAILED
+cbuffer cbuffer_data : register(b0, space0) {
+  uint4 data[2];
+};
 
-binary/mul/mat3x2-vec3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x2<f16>,
-             ^^^^^^^^^^^
+matrix<float16_t, 3, 2> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint ubo_load = buffer[scalar_offset / 4][scalar_offset % 4];
+  const uint scalar_offset_1 = ((offset + 4u)) / 4;
+  uint ubo_load_1 = buffer[scalar_offset_1 / 4][scalar_offset_1 % 4];
+  const uint scalar_offset_2 = ((offset + 8u)) / 4;
+  uint ubo_load_2 = buffer[scalar_offset_2 / 4][scalar_offset_2 % 4];
+  return matrix<float16_t, 3, 2>(vector<float16_t, 2>(float16_t(f16tof32(ubo_load & 0xFFFF)), float16_t(f16tof32(ubo_load >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_1 & 0xFFFF)), float16_t(f16tof32(ubo_load_1 >> 16))), vector<float16_t, 2>(float16_t(f16tof32(ubo_load_2 & 0xFFFF)), float16_t(f16tof32(ubo_load_2 >> 16))));
+}
 
-binary/mul/mat3x2-vec3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(24) */ struct S {
-/* offset( 0) align(4) size(12) */   matrix : mat3x2<f16>;
-/* offset(12) align(1) size( 4) */   // -- implicit field alignment padding --;
-/* offset(16) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(22) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
-struct S {
-^^^^^^
-
-binary/mul/mat3x2-vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
-
+void main() {
+  uint2 ubo_load_3 = data[1].xy;
+  vector<float16_t, 2> ubo_load_3_xz = vector<float16_t, 2>(f16tof32(ubo_load_3 & 0xFFFF));
+  float16_t ubo_load_3_y = f16tof32(ubo_load_3[0] >> 16);
+  const vector<float16_t, 2> x = mul(vector<float16_t, 3>(ubo_load_3_xz[0], ubo_load_3_y, ubo_load_3_xz[1]), tint_symbol_2(data, 0u));
+  return;
+}

diff --git a/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.glsl b/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.glsl
index b949b92..35e00f4 100644
--- a/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.glsl
+++ b/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.glsl

@@ -1,20 +1,38 @@
-SKIP: FAILED
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+precision mediump float;
 
-expressions/binary/mul/mat3x2-vec3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x2<f16>,
-             ^^^^^^^^^^^
-
-expressions/binary/mul/mat3x2-vec3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(24) */ struct S {
-/* offset( 0) align(4) size(12) */   matrix : mat3x2<f16>;
-/* offset(12) align(1) size( 4) */   // -- implicit field alignment padding --;
-/* offset(16) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(22) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
 struct S {
-^^^^^^
+  f16mat3x2 matrix;
+  uint pad;
+  f16vec3 vector;
+  uint pad_1;
+  uint pad_2;
+};
 
-expressions/binary/mul/mat3x2-vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+struct S_std140 {
+  f16vec2 matrix_0;
+  f16vec2 matrix_1;
+  f16vec2 matrix_2;
+  uint pad;
+  f16vec3 vector;
+  uint pad_1;
+  uint pad_2;
+};
 
+layout(binding = 0, std140) uniform data_block_std140_ubo {
+  S_std140 inner;
+} data;
+
+f16mat3x2 load_data_inner_matrix() {
+  return f16mat3x2(data.inner.matrix_0, data.inner.matrix_1, data.inner.matrix_2);
+}
+
+void tint_symbol() {
+  f16vec2 x = (load_data_inner_matrix() * data.inner.vector);
+}
+
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.msl b/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.msl
index ae79b76..779a708 100644
--- a/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.msl
+++ b/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.msl

@@ -1,20 +1,28 @@
-SKIP: FAILED
+#include <metal_stdlib>
 
-binary/mul/mat3x2-vec3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x2<f16>,
-             ^^^^^^^^^^^
+using namespace metal;
 
-binary/mul/mat3x2-vec3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(24) */ struct S {
-/* offset( 0) align(4) size(12) */   matrix : mat3x2<f16>;
-/* offset(12) align(1) size( 4) */   // -- implicit field alignment padding --;
-/* offset(16) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(22) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
 struct S {
-^^^^^^
+  /* 0x0000 */ half3x2 tint_symbol;
+  /* 0x000c */ tint_array<int8_t, 4> tint_pad;
+  /* 0x0010 */ packed_half3 vector;
+  /* 0x0016 */ tint_array<int8_t, 2> tint_pad_1;
+};
 
-binary/mul/mat3x2-vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+fragment void tint_symbol_1(const constant S* tint_symbol_2 [[buffer(0)]]) {
+  half2 const x = ((*(tint_symbol_2)).tint_symbol * half3((*(tint_symbol_2)).vector));
+  return;
+}
 

diff --git a/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.spvasm b/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.spvasm
index ae79b76..1ff70b3 100644
--- a/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.spvasm
+++ b/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.spvasm

@@ -1,20 +1,71 @@
-SKIP: FAILED
-
-binary/mul/mat3x2-vec3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x2<f16>,
-             ^^^^^^^^^^^
-
-binary/mul/mat3x2-vec3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(24) */ struct S {
-/* offset( 0) align(4) size(12) */   matrix : mat3x2<f16>;
-/* offset(12) align(1) size( 4) */   // -- implicit field alignment padding --;
-/* offset(16) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(22) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
-struct S {
-^^^^^^
-
-binary/mul/mat3x2-vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
-
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpName %data_block_std140 "data_block_std140"
+               OpMemberName %data_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "matrix_0"
+               OpMemberName %S_std140 1 "matrix_1"
+               OpMemberName %S_std140 2 "matrix_2"
+               OpMemberName %S_std140 3 "vector"
+               OpName %data "data"
+               OpName %load_data_inner_matrix "load_data_inner_matrix"
+               OpName %main "main"
+               OpDecorate %data_block_std140 Block
+               OpMemberDecorate %data_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 4
+               OpMemberDecorate %S_std140 2 Offset 8
+               OpMemberDecorate %S_std140 3 Offset 16
+               OpDecorate %data NonWritable
+               OpDecorate %data DescriptorSet 0
+               OpDecorate %data Binding 0
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %v2half %v2half %v2half %v3half
+%data_block_std140 = OpTypeStruct %S_std140
+%_ptr_Uniform_data_block_std140 = OpTypePointer Uniform %data_block_std140
+       %data = OpVariable %_ptr_Uniform_data_block_std140 Uniform
+ %mat3v2half = OpTypeMatrix %v2half 3
+          %8 = OpTypeFunction %mat3v2half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %30 = OpTypeFunction %void
+     %uint_3 = OpConstant %uint 3
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%load_data_inner_matrix = OpFunction %mat3v2half None %8
+         %11 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform_S_std140 %data %uint_0
+         %19 = OpAccessChain %_ptr_Uniform_v2half %16 %uint_0
+         %20 = OpLoad %v2half %19
+         %23 = OpAccessChain %_ptr_Uniform_v2half %16 %uint_1
+         %24 = OpLoad %v2half %23
+         %27 = OpAccessChain %_ptr_Uniform_v2half %16 %uint_2
+         %28 = OpLoad %v2half %27
+         %29 = OpCompositeConstruct %mat3v2half %20 %24 %28
+               OpReturnValue %29
+               OpFunctionEnd
+       %main = OpFunction %void None %30
+         %33 = OpLabel
+         %34 = OpFunctionCall %mat3v2half %load_data_inner_matrix
+         %37 = OpAccessChain %_ptr_Uniform_v3half %data %uint_0 %uint_3
+         %38 = OpLoad %v3half %37
+         %39 = OpMatrixTimesVector %v2half %34 %38
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.wgsl b/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.wgsl
index ae79b76..dfa05d4 100644
--- a/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.wgsl
+++ b/test/tint/expressions/binary/mul/mat3x2-vec3/f16.wgsl.expected.wgsl

@@ -1,20 +1,13 @@
-SKIP: FAILED
+enable f16;
 
-binary/mul/mat3x2-vec3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x2<f16>,
-             ^^^^^^^^^^^
-
-binary/mul/mat3x2-vec3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(24) */ struct S {
-/* offset( 0) align(4) size(12) */   matrix : mat3x2<f16>;
-/* offset(12) align(1) size( 4) */   // -- implicit field alignment padding --;
-/* offset(16) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(22) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
 struct S {
-^^^^^^
+  matrix : mat3x2<f16>,
+  vector : vec3<f16>,
+}
 
-binary/mul/mat3x2-vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+@group(0) @binding(0) var<uniform> data : S;
 
+@fragment
+fn main() {
+  let x = (data.matrix * data.vector);
+}

diff --git a/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.dxc.hlsl b/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.dxc.hlsl
index 57a21b5..a60cb20 100644
--- a/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.dxc.hlsl
+++ b/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.dxc.hlsl

@@ -1,19 +1,30 @@
-SKIP: FAILED
+cbuffer cbuffer_data : register(b0, space0) {
+  uint4 data[2];
+};
 
-binary/mul/mat3x3-vec3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x3<f16>,
-             ^^^^^^^^^^^
+matrix<float16_t, 3, 3> tint_symbol_2(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
 
-binary/mul/mat3x3-vec3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(32) */ struct S {
-/* offset( 0) align(8) size(24) */   matrix : mat3x3<f16>;
-/* offset(24) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(30) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
-struct S {
-^^^^^^
-
-binary/mul/mat3x3-vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
-
+void main() {
+  uint2 ubo_load_6 = data[1].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const vector<float16_t, 3> x = mul(vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]), tint_symbol_2(data, 0u));
+  return;
+}

diff --git a/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.glsl b/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.glsl
index 07caac0..24726cc 100644
--- a/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.glsl
+++ b/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.glsl

@@ -1,19 +1,32 @@
-SKIP: FAILED
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+precision mediump float;
 
-expressions/binary/mul/mat3x3-vec3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x3<f16>,
-             ^^^^^^^^^^^
-
-expressions/binary/mul/mat3x3-vec3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(32) */ struct S {
-/* offset( 0) align(8) size(24) */   matrix : mat3x3<f16>;
-/* offset(24) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(30) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
 struct S {
-^^^^^^
+  f16mat3 matrix;
+  f16vec3 vector;
+};
 
-expressions/binary/mul/mat3x3-vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+struct S_std140 {
+  f16vec3 matrix_0;
+  f16vec3 matrix_1;
+  f16vec3 matrix_2;
+  f16vec3 vector;
+};
 
+layout(binding = 0, std140) uniform data_block_std140_ubo {
+  S_std140 inner;
+} data;
+
+f16mat3 load_data_inner_matrix() {
+  return f16mat3(data.inner.matrix_0, data.inner.matrix_1, data.inner.matrix_2);
+}
+
+void tint_symbol() {
+  f16vec3 x = (load_data_inner_matrix() * data.inner.vector);
+}
+
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.msl b/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.msl
index 57a21b5..9c67a7c 100644
--- a/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.msl
+++ b/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.msl

@@ -1,19 +1,27 @@
-SKIP: FAILED
+#include <metal_stdlib>
 
-binary/mul/mat3x3-vec3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x3<f16>,
-             ^^^^^^^^^^^
+using namespace metal;
 
-binary/mul/mat3x3-vec3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(32) */ struct S {
-/* offset( 0) align(8) size(24) */   matrix : mat3x3<f16>;
-/* offset(24) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(30) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
 struct S {
-^^^^^^
+  /* 0x0000 */ half3x3 tint_symbol;
+  /* 0x0018 */ packed_half3 vector;
+  /* 0x001e */ tint_array<int8_t, 2> tint_pad;
+};
 
-binary/mul/mat3x3-vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+fragment void tint_symbol_1(const constant S* tint_symbol_2 [[buffer(0)]]) {
+  half3 const x = ((*(tint_symbol_2)).tint_symbol * half3((*(tint_symbol_2)).vector));
+  return;
+}
 

diff --git a/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.spvasm b/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.spvasm
index 57a21b5..09734ff 100644
--- a/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.spvasm
+++ b/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.spvasm

@@ -1,19 +1,69 @@
-SKIP: FAILED
-
-binary/mul/mat3x3-vec3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x3<f16>,
-             ^^^^^^^^^^^
-
-binary/mul/mat3x3-vec3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(32) */ struct S {
-/* offset( 0) align(8) size(24) */   matrix : mat3x3<f16>;
-/* offset(24) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(30) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
-struct S {
-^^^^^^
-
-binary/mul/mat3x3-vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
-
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 38
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpName %data_block_std140 "data_block_std140"
+               OpMemberName %data_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "matrix_0"
+               OpMemberName %S_std140 1 "matrix_1"
+               OpMemberName %S_std140 2 "matrix_2"
+               OpMemberName %S_std140 3 "vector"
+               OpName %data "data"
+               OpName %load_data_inner_matrix "load_data_inner_matrix"
+               OpName %main "main"
+               OpDecorate %data_block_std140 Block
+               OpMemberDecorate %data_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpDecorate %data NonWritable
+               OpDecorate %data DescriptorSet 0
+               OpDecorate %data Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+%data_block_std140 = OpTypeStruct %S_std140
+%_ptr_Uniform_data_block_std140 = OpTypePointer Uniform %data_block_std140
+       %data = OpVariable %_ptr_Uniform_data_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+          %7 = OpTypeFunction %mat3v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+     %uint_3 = OpConstant %uint 3
+%load_data_inner_matrix = OpFunction %mat3v3half None %7
+         %10 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_S_std140 %data %uint_0
+         %18 = OpAccessChain %_ptr_Uniform_v3half %15 %uint_0
+         %19 = OpLoad %v3half %18
+         %22 = OpAccessChain %_ptr_Uniform_v3half %15 %uint_1
+         %23 = OpLoad %v3half %22
+         %26 = OpAccessChain %_ptr_Uniform_v3half %15 %uint_2
+         %27 = OpLoad %v3half %26
+         %28 = OpCompositeConstruct %mat3v3half %19 %23 %27
+               OpReturnValue %28
+               OpFunctionEnd
+       %main = OpFunction %void None %29
+         %32 = OpLabel
+         %33 = OpFunctionCall %mat3v3half %load_data_inner_matrix
+         %35 = OpAccessChain %_ptr_Uniform_v3half %data %uint_0 %uint_3
+         %36 = OpLoad %v3half %35
+         %37 = OpMatrixTimesVector %v3half %33 %36
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.wgsl b/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.wgsl
index 57a21b5..9b6f18c 100644
--- a/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.wgsl
+++ b/test/tint/expressions/binary/mul/mat3x3-vec3/f16.wgsl.expected.wgsl

@@ -1,19 +1,13 @@
-SKIP: FAILED
+enable f16;
 
-binary/mul/mat3x3-vec3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x3<f16>,
-             ^^^^^^^^^^^
-
-binary/mul/mat3x3-vec3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(32) */ struct S {
-/* offset( 0) align(8) size(24) */   matrix : mat3x3<f16>;
-/* offset(24) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(30) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
 struct S {
-^^^^^^
+  matrix : mat3x3<f16>,
+  vector : vec3<f16>,
+}
 
-binary/mul/mat3x3-vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+@group(0) @binding(0) var<uniform> data : S;
 
+@fragment
+fn main() {
+  let x = (data.matrix * data.vector);
+}

diff --git a/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.dxc.hlsl b/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.dxc.hlsl
index df85ef6..4f38587 100644
--- a/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.dxc.hlsl
+++ b/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.dxc.hlsl

@@ -1,19 +1,30 @@
-SKIP: FAILED
+cbuffer cbuffer_data : register(b0, space0) {
+  uint4 data[2];
+};
 
-binary/mul/vec3-mat3x3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x3<f16>,
-             ^^^^^^^^^^^
+matrix<float16_t, 3, 3> tint_symbol_3(uint4 buffer[2], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  return matrix<float16_t, 3, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]));
+}
 
-binary/mul/vec3-mat3x3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(32) */ struct S {
-/* offset( 0) align(8) size(24) */   matrix : mat3x3<f16>;
-/* offset(24) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(30) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
-struct S {
-^^^^^^
-
-binary/mul/vec3-mat3x3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
-
+void main() {
+  uint2 ubo_load_6 = data[1].zw;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  const vector<float16_t, 3> x = mul(tint_symbol_3(data, 0u), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+  return;
+}

diff --git a/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.glsl b/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.glsl
index 33e1cf0..3382518 100644
--- a/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.glsl
+++ b/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.glsl

@@ -1,19 +1,32 @@
-SKIP: FAILED
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+precision mediump float;
 
-expressions/binary/mul/vec3-mat3x3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x3<f16>,
-             ^^^^^^^^^^^
-
-expressions/binary/mul/vec3-mat3x3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(32) */ struct S {
-/* offset( 0) align(8) size(24) */   matrix : mat3x3<f16>;
-/* offset(24) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(30) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
 struct S {
-^^^^^^
+  f16mat3 matrix;
+  f16vec3 vector;
+};
 
-expressions/binary/mul/vec3-mat3x3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+struct S_std140 {
+  f16vec3 matrix_0;
+  f16vec3 matrix_1;
+  f16vec3 matrix_2;
+  f16vec3 vector;
+};
 
+layout(binding = 0, std140) uniform data_block_std140_ubo {
+  S_std140 inner;
+} data;
+
+f16mat3 load_data_inner_matrix() {
+  return f16mat3(data.inner.matrix_0, data.inner.matrix_1, data.inner.matrix_2);
+}
+
+void tint_symbol() {
+  f16vec3 x = (data.inner.vector * load_data_inner_matrix());
+}
+
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.msl b/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.msl
index df85ef6..1d92d8d 100644
--- a/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.msl
+++ b/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.msl

@@ -1,19 +1,27 @@
-SKIP: FAILED
+#include <metal_stdlib>
 
-binary/mul/vec3-mat3x3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x3<f16>,
-             ^^^^^^^^^^^
+using namespace metal;
 
-binary/mul/vec3-mat3x3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(32) */ struct S {
-/* offset( 0) align(8) size(24) */   matrix : mat3x3<f16>;
-/* offset(24) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(30) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
 struct S {
-^^^^^^
+  /* 0x0000 */ half3x3 tint_symbol;
+  /* 0x0018 */ packed_half3 vector;
+  /* 0x001e */ tint_array<int8_t, 2> tint_pad;
+};
 
-binary/mul/vec3-mat3x3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+fragment void tint_symbol_1(const constant S* tint_symbol_2 [[buffer(0)]]) {
+  half3 const x = (half3((*(tint_symbol_2)).vector) * (*(tint_symbol_2)).tint_symbol);
+  return;
+}
 

diff --git a/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.spvasm b/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.spvasm
index df85ef6..debae7d 100644
--- a/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.spvasm
+++ b/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.spvasm

@@ -1,19 +1,69 @@
-SKIP: FAILED
-
-binary/mul/vec3-mat3x3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x3<f16>,
-             ^^^^^^^^^^^
-
-binary/mul/vec3-mat3x3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(32) */ struct S {
-/* offset( 0) align(8) size(24) */   matrix : mat3x3<f16>;
-/* offset(24) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(30) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
-struct S {
-^^^^^^
-
-binary/mul/vec3-mat3x3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
-
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 38
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpName %data_block_std140 "data_block_std140"
+               OpMemberName %data_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "matrix_0"
+               OpMemberName %S_std140 1 "matrix_1"
+               OpMemberName %S_std140 2 "matrix_2"
+               OpMemberName %S_std140 3 "vector"
+               OpName %data "data"
+               OpName %load_data_inner_matrix "load_data_inner_matrix"
+               OpName %main "main"
+               OpDecorate %data_block_std140 Block
+               OpMemberDecorate %data_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpDecorate %data NonWritable
+               OpDecorate %data DescriptorSet 0
+               OpDecorate %data Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %v3half %v3half %v3half %v3half
+%data_block_std140 = OpTypeStruct %S_std140
+%_ptr_Uniform_data_block_std140 = OpTypePointer Uniform %data_block_std140
+       %data = OpVariable %_ptr_Uniform_data_block_std140 Uniform
+ %mat3v3half = OpTypeMatrix %v3half 3
+          %7 = OpTypeFunction %mat3v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+     %uint_3 = OpConstant %uint 3
+%load_data_inner_matrix = OpFunction %mat3v3half None %7
+         %10 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_S_std140 %data %uint_0
+         %18 = OpAccessChain %_ptr_Uniform_v3half %15 %uint_0
+         %19 = OpLoad %v3half %18
+         %22 = OpAccessChain %_ptr_Uniform_v3half %15 %uint_1
+         %23 = OpLoad %v3half %22
+         %26 = OpAccessChain %_ptr_Uniform_v3half %15 %uint_2
+         %27 = OpLoad %v3half %26
+         %28 = OpCompositeConstruct %mat3v3half %19 %23 %27
+               OpReturnValue %28
+               OpFunctionEnd
+       %main = OpFunction %void None %29
+         %32 = OpLabel
+         %34 = OpAccessChain %_ptr_Uniform_v3half %data %uint_0 %uint_3
+         %35 = OpLoad %v3half %34
+         %36 = OpFunctionCall %mat3v3half %load_data_inner_matrix
+         %37 = OpVectorTimesMatrix %v3half %35 %36
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.wgsl b/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.wgsl
index df85ef6..9c71b03 100644
--- a/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.wgsl
+++ b/test/tint/expressions/binary/mul/vec3-mat3x3/f16.wgsl.expected.wgsl

@@ -1,19 +1,13 @@
-SKIP: FAILED
+enable f16;
 
-binary/mul/vec3-mat3x3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat3x3<f16>,
-             ^^^^^^^^^^^
-
-binary/mul/vec3-mat3x3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(32) */ struct S {
-/* offset( 0) align(8) size(24) */   matrix : mat3x3<f16>;
-/* offset(24) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(30) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
 struct S {
-^^^^^^
+  matrix : mat3x3<f16>,
+  vector : vec3<f16>,
+}
 
-binary/mul/vec3-mat3x3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+@group(0) @binding(0) var<uniform> data : S;
 
+@fragment
+fn main() {
+  let x = (data.vector * data.matrix);
+}

diff --git a/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.dxc.hlsl b/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.dxc.hlsl
index 6f70109..818c96e 100644
--- a/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.dxc.hlsl
+++ b/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.dxc.hlsl

@@ -1,19 +1,35 @@
-SKIP: FAILED
+cbuffer cbuffer_data : register(b0, space0) {
+  uint4 data[3];
+};
 
-binary/mul/vec3-mat4x3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat4x3<f16>,
-             ^^^^^^^^^^^
+matrix<float16_t, 4, 3> tint_symbol_3(uint4 buffer[3], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
 
-binary/mul/vec3-mat4x3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(40) */ struct S {
-/* offset( 0) align(8) size(32) */   matrix : mat4x3<f16>;
-/* offset(32) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(38) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
-struct S {
-^^^^^^
-
-binary/mul/vec3-mat4x3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
-
+void main() {
+  uint2 ubo_load_8 = data[2].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 4> x = mul(tint_symbol_3(data, 0u), vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]));
+  return;
+}

diff --git a/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.glsl b/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.glsl
index 9f92e39..b684ca9 100644
--- a/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.glsl
+++ b/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.glsl

@@ -1,19 +1,37 @@
-SKIP: FAILED
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+precision mediump float;
 
-expressions/binary/mul/vec3-mat4x3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat4x3<f16>,
-             ^^^^^^^^^^^
-
-expressions/binary/mul/vec3-mat4x3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(40) */ struct S {
-/* offset( 0) align(8) size(32) */   matrix : mat4x3<f16>;
-/* offset(32) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(38) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
 struct S {
-^^^^^^
+  f16mat4x3 matrix;
+  f16vec3 vector;
+  uint pad;
+  uint pad_1;
+};
 
-expressions/binary/mul/vec3-mat4x3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+struct S_std140 {
+  f16vec3 matrix_0;
+  f16vec3 matrix_1;
+  f16vec3 matrix_2;
+  f16vec3 matrix_3;
+  f16vec3 vector;
+  uint pad;
+  uint pad_1;
+};
 
+layout(binding = 0, std140) uniform data_block_std140_ubo {
+  S_std140 inner;
+} data;
+
+f16mat4x3 load_data_inner_matrix() {
+  return f16mat4x3(data.inner.matrix_0, data.inner.matrix_1, data.inner.matrix_2, data.inner.matrix_3);
+}
+
+void tint_symbol() {
+  f16vec4 x = (data.inner.vector * load_data_inner_matrix());
+}
+
+void main() {
+  tint_symbol();
+  return;
+}

diff --git a/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.msl b/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.msl
index 6f70109..d311e88 100644
--- a/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.msl
+++ b/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.msl

@@ -1,19 +1,27 @@
-SKIP: FAILED
+#include <metal_stdlib>
 
-binary/mul/vec3-mat4x3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat4x3<f16>,
-             ^^^^^^^^^^^
+using namespace metal;
 
-binary/mul/vec3-mat4x3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(40) */ struct S {
-/* offset( 0) align(8) size(32) */   matrix : mat4x3<f16>;
-/* offset(32) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(38) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
 struct S {
-^^^^^^
+  /* 0x0000 */ half4x3 tint_symbol;
+  /* 0x0020 */ packed_half3 vector;
+  /* 0x0026 */ tint_array<int8_t, 2> tint_pad;
+};
 
-binary/mul/vec3-mat4x3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+fragment void tint_symbol_1(const constant S* tint_symbol_2 [[buffer(0)]]) {
+  half4 const x = (half3((*(tint_symbol_2)).vector) * (*(tint_symbol_2)).tint_symbol);
+  return;
+}
 

diff --git a/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.spvasm b/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.spvasm
index 6f70109..6cb5a70 100644
--- a/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.spvasm
+++ b/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.spvasm

@@ -1,19 +1,75 @@
-SKIP: FAILED
-
-binary/mul/vec3-mat4x3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat4x3<f16>,
-             ^^^^^^^^^^^
-
-binary/mul/vec3-mat4x3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(40) */ struct S {
-/* offset( 0) align(8) size(32) */   matrix : mat4x3<f16>;
-/* offset(32) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(38) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
-struct S {
-^^^^^^
-
-binary/mul/vec3-mat4x3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
-
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 43
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpName %data_block_std140 "data_block_std140"
+               OpMemberName %data_block_std140 0 "inner"
+               OpName %S_std140 "S_std140"
+               OpMemberName %S_std140 0 "matrix_0"
+               OpMemberName %S_std140 1 "matrix_1"
+               OpMemberName %S_std140 2 "matrix_2"
+               OpMemberName %S_std140 3 "matrix_3"
+               OpMemberName %S_std140 4 "vector"
+               OpName %data "data"
+               OpName %load_data_inner_matrix "load_data_inner_matrix"
+               OpName %main "main"
+               OpDecorate %data_block_std140 Block
+               OpMemberDecorate %data_block_std140 0 Offset 0
+               OpMemberDecorate %S_std140 0 Offset 0
+               OpMemberDecorate %S_std140 1 Offset 8
+               OpMemberDecorate %S_std140 2 Offset 16
+               OpMemberDecorate %S_std140 3 Offset 24
+               OpMemberDecorate %S_std140 4 Offset 32
+               OpDecorate %data NonWritable
+               OpDecorate %data DescriptorSet 0
+               OpDecorate %data Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+   %S_std140 = OpTypeStruct %v3half %v3half %v3half %v3half %v3half
+%data_block_std140 = OpTypeStruct %S_std140
+%_ptr_Uniform_data_block_std140 = OpTypePointer Uniform %data_block_std140
+       %data = OpVariable %_ptr_Uniform_data_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+          %7 = OpTypeFunction %mat4v3half
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_S_std140 = OpTypePointer Uniform %S_std140
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %void = OpTypeVoid
+         %33 = OpTypeFunction %void
+     %uint_4 = OpConstant %uint 4
+     %v4half = OpTypeVector %half 4
+%load_data_inner_matrix = OpFunction %mat4v3half None %7
+         %10 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_S_std140 %data %uint_0
+         %18 = OpAccessChain %_ptr_Uniform_v3half %15 %uint_0
+         %19 = OpLoad %v3half %18
+         %22 = OpAccessChain %_ptr_Uniform_v3half %15 %uint_1
+         %23 = OpLoad %v3half %22
+         %26 = OpAccessChain %_ptr_Uniform_v3half %15 %uint_2
+         %27 = OpLoad %v3half %26
+         %30 = OpAccessChain %_ptr_Uniform_v3half %15 %uint_3
+         %31 = OpLoad %v3half %30
+         %32 = OpCompositeConstruct %mat4v3half %19 %23 %27 %31
+               OpReturnValue %32
+               OpFunctionEnd
+       %main = OpFunction %void None %33
+         %36 = OpLabel
+         %38 = OpAccessChain %_ptr_Uniform_v3half %data %uint_0 %uint_4
+         %39 = OpLoad %v3half %38
+         %40 = OpFunctionCall %mat4v3half %load_data_inner_matrix
+         %41 = OpVectorTimesMatrix %v4half %39 %40
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.wgsl b/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.wgsl
index 6f70109..93ffb9e 100644
--- a/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.wgsl
+++ b/test/tint/expressions/binary/mul/vec3-mat4x3/f16.wgsl.expected.wgsl

@@ -1,19 +1,13 @@
-SKIP: FAILED
+enable f16;
 
-binary/mul/vec3-mat4x3/f16.wgsl:3:14 error: using f16 types in 'uniform' address space is not implemented yet
-    matrix : mat4x3<f16>,
-             ^^^^^^^^^^^
-
-binary/mul/vec3-mat4x3/f16.wgsl:2:1 note: see layout of struct:
-/*            align(8) size(40) */ struct S {
-/* offset( 0) align(8) size(32) */   matrix : mat4x3<f16>;
-/* offset(32) align(8) size( 6) */   vector : vec3<f16>;
-/* offset(38) align(1) size( 2) */   // -- implicit struct size padding --;
-/*                              */ };
 struct S {
-^^^^^^
+  matrix : mat4x3<f16>,
+  vector : vec3<f16>,
+}
 
-binary/mul/vec3-mat4x3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> data: S;
-                                   ^^^^
+@group(0) @binding(0) var<uniform> data : S;
 
+@fragment
+fn main() {
+  let x = (data.vector * data.matrix);
+}

diff --git a/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.dxc.hlsl b/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.dxc.hlsl
index 6c8063b..5be5dfc 100644
--- a/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.dxc.hlsl
+++ b/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.dxc.hlsl

@@ -1,18 +1,486 @@
-SKIP: FAILED
+[numthreads(1, 1, 1)]
+void unused_entry_point() {
+  return;
+}
 
-swizzle/read/packed_vec3/f16.wgsl:3:8 error: using f16 types in 'uniform' address space is not implemented yet
-    v: vec3<f16>,
-       ^^^^^^^^^
+cbuffer cbuffer_U : register(b0, space0) {
+  uint4 U[1];
+};
 
-swizzle/read/packed_vec3/f16.wgsl:2:1 note: see layout of struct:
-/*           align(8) size(8) */ struct S {
-/* offset(0) align(8) size(6) */   v : vec3<f16>;
-/* offset(6) align(1) size(2) */   // -- implicit struct size padding --;
-/*                            */ };
-struct S {
-^^^^^^
-
-swizzle/read/packed_vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> U : S;
-                                   ^
-
+void f() {
+  uint2 ubo_load = U[0].xy;
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  vector<float16_t, 3> v = vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]);
+  float16_t x = float16_t(f16tof32(((U[0].x) & 0xFFFF)));
+  float16_t y = float16_t(f16tof32(((U[0].x >> 16) & 0xFFFF)));
+  float16_t z = float16_t(f16tof32(((U[0].y) & 0xFFFF)));
+  uint2 ubo_load_1 = U[0].xy;
+  vector<float16_t, 2> ubo_load_1_xz = vector<float16_t, 2>(f16tof32(ubo_load_1 & 0xFFFF));
+  float16_t ubo_load_1_y = f16tof32(ubo_load_1[0] >> 16);
+  vector<float16_t, 2> xx = vector<float16_t, 3>(ubo_load_1_xz[0], ubo_load_1_y, ubo_load_1_xz[1]).xx;
+  uint2 ubo_load_2 = U[0].xy;
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  vector<float16_t, 2> xy = vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]).xy;
+  uint2 ubo_load_3 = U[0].xy;
+  vector<float16_t, 2> ubo_load_3_xz = vector<float16_t, 2>(f16tof32(ubo_load_3 & 0xFFFF));
+  float16_t ubo_load_3_y = f16tof32(ubo_load_3[0] >> 16);
+  vector<float16_t, 2> xz = vector<float16_t, 3>(ubo_load_3_xz[0], ubo_load_3_y, ubo_load_3_xz[1]).xz;
+  uint2 ubo_load_4 = U[0].xy;
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  vector<float16_t, 2> yx = vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]).yx;
+  uint2 ubo_load_5 = U[0].xy;
+  vector<float16_t, 2> ubo_load_5_xz = vector<float16_t, 2>(f16tof32(ubo_load_5 & 0xFFFF));
+  float16_t ubo_load_5_y = f16tof32(ubo_load_5[0] >> 16);
+  vector<float16_t, 2> yy = vector<float16_t, 3>(ubo_load_5_xz[0], ubo_load_5_y, ubo_load_5_xz[1]).yy;
+  uint2 ubo_load_6 = U[0].xy;
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  vector<float16_t, 2> yz = vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]).yz;
+  uint2 ubo_load_7 = U[0].xy;
+  vector<float16_t, 2> ubo_load_7_xz = vector<float16_t, 2>(f16tof32(ubo_load_7 & 0xFFFF));
+  float16_t ubo_load_7_y = f16tof32(ubo_load_7[0] >> 16);
+  vector<float16_t, 2> zx = vector<float16_t, 3>(ubo_load_7_xz[0], ubo_load_7_y, ubo_load_7_xz[1]).zx;
+  uint2 ubo_load_8 = U[0].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  vector<float16_t, 2> zy = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zy;
+  uint2 ubo_load_9 = U[0].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  vector<float16_t, 2> zz = vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zz;
+  uint2 ubo_load_10 = U[0].xy;
+  vector<float16_t, 2> ubo_load_10_xz = vector<float16_t, 2>(f16tof32(ubo_load_10 & 0xFFFF));
+  float16_t ubo_load_10_y = f16tof32(ubo_load_10[0] >> 16);
+  vector<float16_t, 3> xxx = vector<float16_t, 3>(ubo_load_10_xz[0], ubo_load_10_y, ubo_load_10_xz[1]).xxx;
+  uint2 ubo_load_11 = U[0].xy;
+  vector<float16_t, 2> ubo_load_11_xz = vector<float16_t, 2>(f16tof32(ubo_load_11 & 0xFFFF));
+  float16_t ubo_load_11_y = f16tof32(ubo_load_11[0] >> 16);
+  vector<float16_t, 3> xxy = vector<float16_t, 3>(ubo_load_11_xz[0], ubo_load_11_y, ubo_load_11_xz[1]).xxy;
+  uint2 ubo_load_12 = U[0].xy;
+  vector<float16_t, 2> ubo_load_12_xz = vector<float16_t, 2>(f16tof32(ubo_load_12 & 0xFFFF));
+  float16_t ubo_load_12_y = f16tof32(ubo_load_12[0] >> 16);
+  vector<float16_t, 3> xxz = vector<float16_t, 3>(ubo_load_12_xz[0], ubo_load_12_y, ubo_load_12_xz[1]).xxz;
+  uint2 ubo_load_13 = U[0].xy;
+  vector<float16_t, 2> ubo_load_13_xz = vector<float16_t, 2>(f16tof32(ubo_load_13 & 0xFFFF));
+  float16_t ubo_load_13_y = f16tof32(ubo_load_13[0] >> 16);
+  vector<float16_t, 3> xyx = vector<float16_t, 3>(ubo_load_13_xz[0], ubo_load_13_y, ubo_load_13_xz[1]).xyx;
+  uint2 ubo_load_14 = U[0].xy;
+  vector<float16_t, 2> ubo_load_14_xz = vector<float16_t, 2>(f16tof32(ubo_load_14 & 0xFFFF));
+  float16_t ubo_load_14_y = f16tof32(ubo_load_14[0] >> 16);
+  vector<float16_t, 3> xyy = vector<float16_t, 3>(ubo_load_14_xz[0], ubo_load_14_y, ubo_load_14_xz[1]).xyy;
+  uint2 ubo_load_15 = U[0].xy;
+  vector<float16_t, 2> ubo_load_15_xz = vector<float16_t, 2>(f16tof32(ubo_load_15 & 0xFFFF));
+  float16_t ubo_load_15_y = f16tof32(ubo_load_15[0] >> 16);
+  vector<float16_t, 3> xyz = vector<float16_t, 3>(ubo_load_15_xz[0], ubo_load_15_y, ubo_load_15_xz[1]).xyz;
+  uint2 ubo_load_16 = U[0].xy;
+  vector<float16_t, 2> ubo_load_16_xz = vector<float16_t, 2>(f16tof32(ubo_load_16 & 0xFFFF));
+  float16_t ubo_load_16_y = f16tof32(ubo_load_16[0] >> 16);
+  vector<float16_t, 3> xzx = vector<float16_t, 3>(ubo_load_16_xz[0], ubo_load_16_y, ubo_load_16_xz[1]).xzx;
+  uint2 ubo_load_17 = U[0].xy;
+  vector<float16_t, 2> ubo_load_17_xz = vector<float16_t, 2>(f16tof32(ubo_load_17 & 0xFFFF));
+  float16_t ubo_load_17_y = f16tof32(ubo_load_17[0] >> 16);
+  vector<float16_t, 3> xzy = vector<float16_t, 3>(ubo_load_17_xz[0], ubo_load_17_y, ubo_load_17_xz[1]).xzy;
+  uint2 ubo_load_18 = U[0].xy;
+  vector<float16_t, 2> ubo_load_18_xz = vector<float16_t, 2>(f16tof32(ubo_load_18 & 0xFFFF));
+  float16_t ubo_load_18_y = f16tof32(ubo_load_18[0] >> 16);
+  vector<float16_t, 3> xzz = vector<float16_t, 3>(ubo_load_18_xz[0], ubo_load_18_y, ubo_load_18_xz[1]).xzz;
+  uint2 ubo_load_19 = U[0].xy;
+  vector<float16_t, 2> ubo_load_19_xz = vector<float16_t, 2>(f16tof32(ubo_load_19 & 0xFFFF));
+  float16_t ubo_load_19_y = f16tof32(ubo_load_19[0] >> 16);
+  vector<float16_t, 3> yxx = vector<float16_t, 3>(ubo_load_19_xz[0], ubo_load_19_y, ubo_load_19_xz[1]).yxx;
+  uint2 ubo_load_20 = U[0].xy;
+  vector<float16_t, 2> ubo_load_20_xz = vector<float16_t, 2>(f16tof32(ubo_load_20 & 0xFFFF));
+  float16_t ubo_load_20_y = f16tof32(ubo_load_20[0] >> 16);
+  vector<float16_t, 3> yxy = vector<float16_t, 3>(ubo_load_20_xz[0], ubo_load_20_y, ubo_load_20_xz[1]).yxy;
+  uint2 ubo_load_21 = U[0].xy;
+  vector<float16_t, 2> ubo_load_21_xz = vector<float16_t, 2>(f16tof32(ubo_load_21 & 0xFFFF));
+  float16_t ubo_load_21_y = f16tof32(ubo_load_21[0] >> 16);
+  vector<float16_t, 3> yxz = vector<float16_t, 3>(ubo_load_21_xz[0], ubo_load_21_y, ubo_load_21_xz[1]).yxz;
+  uint2 ubo_load_22 = U[0].xy;
+  vector<float16_t, 2> ubo_load_22_xz = vector<float16_t, 2>(f16tof32(ubo_load_22 & 0xFFFF));
+  float16_t ubo_load_22_y = f16tof32(ubo_load_22[0] >> 16);
+  vector<float16_t, 3> yyx = vector<float16_t, 3>(ubo_load_22_xz[0], ubo_load_22_y, ubo_load_22_xz[1]).yyx;
+  uint2 ubo_load_23 = U[0].xy;
+  vector<float16_t, 2> ubo_load_23_xz = vector<float16_t, 2>(f16tof32(ubo_load_23 & 0xFFFF));
+  float16_t ubo_load_23_y = f16tof32(ubo_load_23[0] >> 16);
+  vector<float16_t, 3> yyy = vector<float16_t, 3>(ubo_load_23_xz[0], ubo_load_23_y, ubo_load_23_xz[1]).yyy;
+  uint2 ubo_load_24 = U[0].xy;
+  vector<float16_t, 2> ubo_load_24_xz = vector<float16_t, 2>(f16tof32(ubo_load_24 & 0xFFFF));
+  float16_t ubo_load_24_y = f16tof32(ubo_load_24[0] >> 16);
+  vector<float16_t, 3> yyz = vector<float16_t, 3>(ubo_load_24_xz[0], ubo_load_24_y, ubo_load_24_xz[1]).yyz;
+  uint2 ubo_load_25 = U[0].xy;
+  vector<float16_t, 2> ubo_load_25_xz = vector<float16_t, 2>(f16tof32(ubo_load_25 & 0xFFFF));
+  float16_t ubo_load_25_y = f16tof32(ubo_load_25[0] >> 16);
+  vector<float16_t, 3> yzx = vector<float16_t, 3>(ubo_load_25_xz[0], ubo_load_25_y, ubo_load_25_xz[1]).yzx;
+  uint2 ubo_load_26 = U[0].xy;
+  vector<float16_t, 2> ubo_load_26_xz = vector<float16_t, 2>(f16tof32(ubo_load_26 & 0xFFFF));
+  float16_t ubo_load_26_y = f16tof32(ubo_load_26[0] >> 16);
+  vector<float16_t, 3> yzy = vector<float16_t, 3>(ubo_load_26_xz[0], ubo_load_26_y, ubo_load_26_xz[1]).yzy;
+  uint2 ubo_load_27 = U[0].xy;
+  vector<float16_t, 2> ubo_load_27_xz = vector<float16_t, 2>(f16tof32(ubo_load_27 & 0xFFFF));
+  float16_t ubo_load_27_y = f16tof32(ubo_load_27[0] >> 16);
+  vector<float16_t, 3> yzz = vector<float16_t, 3>(ubo_load_27_xz[0], ubo_load_27_y, ubo_load_27_xz[1]).yzz;
+  uint2 ubo_load_28 = U[0].xy;
+  vector<float16_t, 2> ubo_load_28_xz = vector<float16_t, 2>(f16tof32(ubo_load_28 & 0xFFFF));
+  float16_t ubo_load_28_y = f16tof32(ubo_load_28[0] >> 16);
+  vector<float16_t, 3> zxx = vector<float16_t, 3>(ubo_load_28_xz[0], ubo_load_28_y, ubo_load_28_xz[1]).zxx;
+  uint2 ubo_load_29 = U[0].xy;
+  vector<float16_t, 2> ubo_load_29_xz = vector<float16_t, 2>(f16tof32(ubo_load_29 & 0xFFFF));
+  float16_t ubo_load_29_y = f16tof32(ubo_load_29[0] >> 16);
+  vector<float16_t, 3> zxy = vector<float16_t, 3>(ubo_load_29_xz[0], ubo_load_29_y, ubo_load_29_xz[1]).zxy;
+  uint2 ubo_load_30 = U[0].xy;
+  vector<float16_t, 2> ubo_load_30_xz = vector<float16_t, 2>(f16tof32(ubo_load_30 & 0xFFFF));
+  float16_t ubo_load_30_y = f16tof32(ubo_load_30[0] >> 16);
+  vector<float16_t, 3> zxz = vector<float16_t, 3>(ubo_load_30_xz[0], ubo_load_30_y, ubo_load_30_xz[1]).zxz;
+  uint2 ubo_load_31 = U[0].xy;
+  vector<float16_t, 2> ubo_load_31_xz = vector<float16_t, 2>(f16tof32(ubo_load_31 & 0xFFFF));
+  float16_t ubo_load_31_y = f16tof32(ubo_load_31[0] >> 16);
+  vector<float16_t, 3> zyx = vector<float16_t, 3>(ubo_load_31_xz[0], ubo_load_31_y, ubo_load_31_xz[1]).zyx;
+  uint2 ubo_load_32 = U[0].xy;
+  vector<float16_t, 2> ubo_load_32_xz = vector<float16_t, 2>(f16tof32(ubo_load_32 & 0xFFFF));
+  float16_t ubo_load_32_y = f16tof32(ubo_load_32[0] >> 16);
+  vector<float16_t, 3> zyy = vector<float16_t, 3>(ubo_load_32_xz[0], ubo_load_32_y, ubo_load_32_xz[1]).zyy;
+  uint2 ubo_load_33 = U[0].xy;
+  vector<float16_t, 2> ubo_load_33_xz = vector<float16_t, 2>(f16tof32(ubo_load_33 & 0xFFFF));
+  float16_t ubo_load_33_y = f16tof32(ubo_load_33[0] >> 16);
+  vector<float16_t, 3> zyz = vector<float16_t, 3>(ubo_load_33_xz[0], ubo_load_33_y, ubo_load_33_xz[1]).zyz;
+  uint2 ubo_load_34 = U[0].xy;
+  vector<float16_t, 2> ubo_load_34_xz = vector<float16_t, 2>(f16tof32(ubo_load_34 & 0xFFFF));
+  float16_t ubo_load_34_y = f16tof32(ubo_load_34[0] >> 16);
+  vector<float16_t, 3> zzx = vector<float16_t, 3>(ubo_load_34_xz[0], ubo_load_34_y, ubo_load_34_xz[1]).zzx;
+  uint2 ubo_load_35 = U[0].xy;
+  vector<float16_t, 2> ubo_load_35_xz = vector<float16_t, 2>(f16tof32(ubo_load_35 & 0xFFFF));
+  float16_t ubo_load_35_y = f16tof32(ubo_load_35[0] >> 16);
+  vector<float16_t, 3> zzy = vector<float16_t, 3>(ubo_load_35_xz[0], ubo_load_35_y, ubo_load_35_xz[1]).zzy;
+  uint2 ubo_load_36 = U[0].xy;
+  vector<float16_t, 2> ubo_load_36_xz = vector<float16_t, 2>(f16tof32(ubo_load_36 & 0xFFFF));
+  float16_t ubo_load_36_y = f16tof32(ubo_load_36[0] >> 16);
+  vector<float16_t, 3> zzz = vector<float16_t, 3>(ubo_load_36_xz[0], ubo_load_36_y, ubo_load_36_xz[1]).zzz;
+  uint2 ubo_load_37 = U[0].xy;
+  vector<float16_t, 2> ubo_load_37_xz = vector<float16_t, 2>(f16tof32(ubo_load_37 & 0xFFFF));
+  float16_t ubo_load_37_y = f16tof32(ubo_load_37[0] >> 16);
+  vector<float16_t, 4> xxxx = vector<float16_t, 3>(ubo_load_37_xz[0], ubo_load_37_y, ubo_load_37_xz[1]).xxxx;
+  uint2 ubo_load_38 = U[0].xy;
+  vector<float16_t, 2> ubo_load_38_xz = vector<float16_t, 2>(f16tof32(ubo_load_38 & 0xFFFF));
+  float16_t ubo_load_38_y = f16tof32(ubo_load_38[0] >> 16);
+  vector<float16_t, 4> xxxy = vector<float16_t, 3>(ubo_load_38_xz[0], ubo_load_38_y, ubo_load_38_xz[1]).xxxy;
+  uint2 ubo_load_39 = U[0].xy;
+  vector<float16_t, 2> ubo_load_39_xz = vector<float16_t, 2>(f16tof32(ubo_load_39 & 0xFFFF));
+  float16_t ubo_load_39_y = f16tof32(ubo_load_39[0] >> 16);
+  vector<float16_t, 4> xxxz = vector<float16_t, 3>(ubo_load_39_xz[0], ubo_load_39_y, ubo_load_39_xz[1]).xxxz;
+  uint2 ubo_load_40 = U[0].xy;
+  vector<float16_t, 2> ubo_load_40_xz = vector<float16_t, 2>(f16tof32(ubo_load_40 & 0xFFFF));
+  float16_t ubo_load_40_y = f16tof32(ubo_load_40[0] >> 16);
+  vector<float16_t, 4> xxyx = vector<float16_t, 3>(ubo_load_40_xz[0], ubo_load_40_y, ubo_load_40_xz[1]).xxyx;
+  uint2 ubo_load_41 = U[0].xy;
+  vector<float16_t, 2> ubo_load_41_xz = vector<float16_t, 2>(f16tof32(ubo_load_41 & 0xFFFF));
+  float16_t ubo_load_41_y = f16tof32(ubo_load_41[0] >> 16);
+  vector<float16_t, 4> xxyy = vector<float16_t, 3>(ubo_load_41_xz[0], ubo_load_41_y, ubo_load_41_xz[1]).xxyy;
+  uint2 ubo_load_42 = U[0].xy;
+  vector<float16_t, 2> ubo_load_42_xz = vector<float16_t, 2>(f16tof32(ubo_load_42 & 0xFFFF));
+  float16_t ubo_load_42_y = f16tof32(ubo_load_42[0] >> 16);
+  vector<float16_t, 4> xxyz = vector<float16_t, 3>(ubo_load_42_xz[0], ubo_load_42_y, ubo_load_42_xz[1]).xxyz;
+  uint2 ubo_load_43 = U[0].xy;
+  vector<float16_t, 2> ubo_load_43_xz = vector<float16_t, 2>(f16tof32(ubo_load_43 & 0xFFFF));
+  float16_t ubo_load_43_y = f16tof32(ubo_load_43[0] >> 16);
+  vector<float16_t, 4> xxzx = vector<float16_t, 3>(ubo_load_43_xz[0], ubo_load_43_y, ubo_load_43_xz[1]).xxzx;
+  uint2 ubo_load_44 = U[0].xy;
+  vector<float16_t, 2> ubo_load_44_xz = vector<float16_t, 2>(f16tof32(ubo_load_44 & 0xFFFF));
+  float16_t ubo_load_44_y = f16tof32(ubo_load_44[0] >> 16);
+  vector<float16_t, 4> xxzy = vector<float16_t, 3>(ubo_load_44_xz[0], ubo_load_44_y, ubo_load_44_xz[1]).xxzy;
+  uint2 ubo_load_45 = U[0].xy;
+  vector<float16_t, 2> ubo_load_45_xz = vector<float16_t, 2>(f16tof32(ubo_load_45 & 0xFFFF));
+  float16_t ubo_load_45_y = f16tof32(ubo_load_45[0] >> 16);
+  vector<float16_t, 4> xxzz = vector<float16_t, 3>(ubo_load_45_xz[0], ubo_load_45_y, ubo_load_45_xz[1]).xxzz;
+  uint2 ubo_load_46 = U[0].xy;
+  vector<float16_t, 2> ubo_load_46_xz = vector<float16_t, 2>(f16tof32(ubo_load_46 & 0xFFFF));
+  float16_t ubo_load_46_y = f16tof32(ubo_load_46[0] >> 16);
+  vector<float16_t, 4> xyxx = vector<float16_t, 3>(ubo_load_46_xz[0], ubo_load_46_y, ubo_load_46_xz[1]).xyxx;
+  uint2 ubo_load_47 = U[0].xy;
+  vector<float16_t, 2> ubo_load_47_xz = vector<float16_t, 2>(f16tof32(ubo_load_47 & 0xFFFF));
+  float16_t ubo_load_47_y = f16tof32(ubo_load_47[0] >> 16);
+  vector<float16_t, 4> xyxy = vector<float16_t, 3>(ubo_load_47_xz[0], ubo_load_47_y, ubo_load_47_xz[1]).xyxy;
+  uint2 ubo_load_48 = U[0].xy;
+  vector<float16_t, 2> ubo_load_48_xz = vector<float16_t, 2>(f16tof32(ubo_load_48 & 0xFFFF));
+  float16_t ubo_load_48_y = f16tof32(ubo_load_48[0] >> 16);
+  vector<float16_t, 4> xyxz = vector<float16_t, 3>(ubo_load_48_xz[0], ubo_load_48_y, ubo_load_48_xz[1]).xyxz;
+  uint2 ubo_load_49 = U[0].xy;
+  vector<float16_t, 2> ubo_load_49_xz = vector<float16_t, 2>(f16tof32(ubo_load_49 & 0xFFFF));
+  float16_t ubo_load_49_y = f16tof32(ubo_load_49[0] >> 16);
+  vector<float16_t, 4> xyyx = vector<float16_t, 3>(ubo_load_49_xz[0], ubo_load_49_y, ubo_load_49_xz[1]).xyyx;
+  uint2 ubo_load_50 = U[0].xy;
+  vector<float16_t, 2> ubo_load_50_xz = vector<float16_t, 2>(f16tof32(ubo_load_50 & 0xFFFF));
+  float16_t ubo_load_50_y = f16tof32(ubo_load_50[0] >> 16);
+  vector<float16_t, 4> xyyy = vector<float16_t, 3>(ubo_load_50_xz[0], ubo_load_50_y, ubo_load_50_xz[1]).xyyy;
+  uint2 ubo_load_51 = U[0].xy;
+  vector<float16_t, 2> ubo_load_51_xz = vector<float16_t, 2>(f16tof32(ubo_load_51 & 0xFFFF));
+  float16_t ubo_load_51_y = f16tof32(ubo_load_51[0] >> 16);
+  vector<float16_t, 4> xyyz = vector<float16_t, 3>(ubo_load_51_xz[0], ubo_load_51_y, ubo_load_51_xz[1]).xyyz;
+  uint2 ubo_load_52 = U[0].xy;
+  vector<float16_t, 2> ubo_load_52_xz = vector<float16_t, 2>(f16tof32(ubo_load_52 & 0xFFFF));
+  float16_t ubo_load_52_y = f16tof32(ubo_load_52[0] >> 16);
+  vector<float16_t, 4> xyzx = vector<float16_t, 3>(ubo_load_52_xz[0], ubo_load_52_y, ubo_load_52_xz[1]).xyzx;
+  uint2 ubo_load_53 = U[0].xy;
+  vector<float16_t, 2> ubo_load_53_xz = vector<float16_t, 2>(f16tof32(ubo_load_53 & 0xFFFF));
+  float16_t ubo_load_53_y = f16tof32(ubo_load_53[0] >> 16);
+  vector<float16_t, 4> xyzy = vector<float16_t, 3>(ubo_load_53_xz[0], ubo_load_53_y, ubo_load_53_xz[1]).xyzy;
+  uint2 ubo_load_54 = U[0].xy;
+  vector<float16_t, 2> ubo_load_54_xz = vector<float16_t, 2>(f16tof32(ubo_load_54 & 0xFFFF));
+  float16_t ubo_load_54_y = f16tof32(ubo_load_54[0] >> 16);
+  vector<float16_t, 4> xyzz = vector<float16_t, 3>(ubo_load_54_xz[0], ubo_load_54_y, ubo_load_54_xz[1]).xyzz;
+  uint2 ubo_load_55 = U[0].xy;
+  vector<float16_t, 2> ubo_load_55_xz = vector<float16_t, 2>(f16tof32(ubo_load_55 & 0xFFFF));
+  float16_t ubo_load_55_y = f16tof32(ubo_load_55[0] >> 16);
+  vector<float16_t, 4> xzxx = vector<float16_t, 3>(ubo_load_55_xz[0], ubo_load_55_y, ubo_load_55_xz[1]).xzxx;
+  uint2 ubo_load_56 = U[0].xy;
+  vector<float16_t, 2> ubo_load_56_xz = vector<float16_t, 2>(f16tof32(ubo_load_56 & 0xFFFF));
+  float16_t ubo_load_56_y = f16tof32(ubo_load_56[0] >> 16);
+  vector<float16_t, 4> xzxy = vector<float16_t, 3>(ubo_load_56_xz[0], ubo_load_56_y, ubo_load_56_xz[1]).xzxy;
+  uint2 ubo_load_57 = U[0].xy;
+  vector<float16_t, 2> ubo_load_57_xz = vector<float16_t, 2>(f16tof32(ubo_load_57 & 0xFFFF));
+  float16_t ubo_load_57_y = f16tof32(ubo_load_57[0] >> 16);
+  vector<float16_t, 4> xzxz = vector<float16_t, 3>(ubo_load_57_xz[0], ubo_load_57_y, ubo_load_57_xz[1]).xzxz;
+  uint2 ubo_load_58 = U[0].xy;
+  vector<float16_t, 2> ubo_load_58_xz = vector<float16_t, 2>(f16tof32(ubo_load_58 & 0xFFFF));
+  float16_t ubo_load_58_y = f16tof32(ubo_load_58[0] >> 16);
+  vector<float16_t, 4> xzyx = vector<float16_t, 3>(ubo_load_58_xz[0], ubo_load_58_y, ubo_load_58_xz[1]).xzyx;
+  uint2 ubo_load_59 = U[0].xy;
+  vector<float16_t, 2> ubo_load_59_xz = vector<float16_t, 2>(f16tof32(ubo_load_59 & 0xFFFF));
+  float16_t ubo_load_59_y = f16tof32(ubo_load_59[0] >> 16);
+  vector<float16_t, 4> xzyy = vector<float16_t, 3>(ubo_load_59_xz[0], ubo_load_59_y, ubo_load_59_xz[1]).xzyy;
+  uint2 ubo_load_60 = U[0].xy;
+  vector<float16_t, 2> ubo_load_60_xz = vector<float16_t, 2>(f16tof32(ubo_load_60 & 0xFFFF));
+  float16_t ubo_load_60_y = f16tof32(ubo_load_60[0] >> 16);
+  vector<float16_t, 4> xzyz = vector<float16_t, 3>(ubo_load_60_xz[0], ubo_load_60_y, ubo_load_60_xz[1]).xzyz;
+  uint2 ubo_load_61 = U[0].xy;
+  vector<float16_t, 2> ubo_load_61_xz = vector<float16_t, 2>(f16tof32(ubo_load_61 & 0xFFFF));
+  float16_t ubo_load_61_y = f16tof32(ubo_load_61[0] >> 16);
+  vector<float16_t, 4> xzzx = vector<float16_t, 3>(ubo_load_61_xz[0], ubo_load_61_y, ubo_load_61_xz[1]).xzzx;
+  uint2 ubo_load_62 = U[0].xy;
+  vector<float16_t, 2> ubo_load_62_xz = vector<float16_t, 2>(f16tof32(ubo_load_62 & 0xFFFF));
+  float16_t ubo_load_62_y = f16tof32(ubo_load_62[0] >> 16);
+  vector<float16_t, 4> xzzy = vector<float16_t, 3>(ubo_load_62_xz[0], ubo_load_62_y, ubo_load_62_xz[1]).xzzy;
+  uint2 ubo_load_63 = U[0].xy;
+  vector<float16_t, 2> ubo_load_63_xz = vector<float16_t, 2>(f16tof32(ubo_load_63 & 0xFFFF));
+  float16_t ubo_load_63_y = f16tof32(ubo_load_63[0] >> 16);
+  vector<float16_t, 4> xzzz = vector<float16_t, 3>(ubo_load_63_xz[0], ubo_load_63_y, ubo_load_63_xz[1]).xzzz;
+  uint2 ubo_load_64 = U[0].xy;
+  vector<float16_t, 2> ubo_load_64_xz = vector<float16_t, 2>(f16tof32(ubo_load_64 & 0xFFFF));
+  float16_t ubo_load_64_y = f16tof32(ubo_load_64[0] >> 16);
+  vector<float16_t, 4> yxxx = vector<float16_t, 3>(ubo_load_64_xz[0], ubo_load_64_y, ubo_load_64_xz[1]).yxxx;
+  uint2 ubo_load_65 = U[0].xy;
+  vector<float16_t, 2> ubo_load_65_xz = vector<float16_t, 2>(f16tof32(ubo_load_65 & 0xFFFF));
+  float16_t ubo_load_65_y = f16tof32(ubo_load_65[0] >> 16);
+  vector<float16_t, 4> yxxy = vector<float16_t, 3>(ubo_load_65_xz[0], ubo_load_65_y, ubo_load_65_xz[1]).yxxy;
+  uint2 ubo_load_66 = U[0].xy;
+  vector<float16_t, 2> ubo_load_66_xz = vector<float16_t, 2>(f16tof32(ubo_load_66 & 0xFFFF));
+  float16_t ubo_load_66_y = f16tof32(ubo_load_66[0] >> 16);
+  vector<float16_t, 4> yxxz = vector<float16_t, 3>(ubo_load_66_xz[0], ubo_load_66_y, ubo_load_66_xz[1]).yxxz;
+  uint2 ubo_load_67 = U[0].xy;
+  vector<float16_t, 2> ubo_load_67_xz = vector<float16_t, 2>(f16tof32(ubo_load_67 & 0xFFFF));
+  float16_t ubo_load_67_y = f16tof32(ubo_load_67[0] >> 16);
+  vector<float16_t, 4> yxyx = vector<float16_t, 3>(ubo_load_67_xz[0], ubo_load_67_y, ubo_load_67_xz[1]).yxyx;
+  uint2 ubo_load_68 = U[0].xy;
+  vector<float16_t, 2> ubo_load_68_xz = vector<float16_t, 2>(f16tof32(ubo_load_68 & 0xFFFF));
+  float16_t ubo_load_68_y = f16tof32(ubo_load_68[0] >> 16);
+  vector<float16_t, 4> yxyy = vector<float16_t, 3>(ubo_load_68_xz[0], ubo_load_68_y, ubo_load_68_xz[1]).yxyy;
+  uint2 ubo_load_69 = U[0].xy;
+  vector<float16_t, 2> ubo_load_69_xz = vector<float16_t, 2>(f16tof32(ubo_load_69 & 0xFFFF));
+  float16_t ubo_load_69_y = f16tof32(ubo_load_69[0] >> 16);
+  vector<float16_t, 4> yxyz = vector<float16_t, 3>(ubo_load_69_xz[0], ubo_load_69_y, ubo_load_69_xz[1]).yxyz;
+  uint2 ubo_load_70 = U[0].xy;
+  vector<float16_t, 2> ubo_load_70_xz = vector<float16_t, 2>(f16tof32(ubo_load_70 & 0xFFFF));
+  float16_t ubo_load_70_y = f16tof32(ubo_load_70[0] >> 16);
+  vector<float16_t, 4> yxzx = vector<float16_t, 3>(ubo_load_70_xz[0], ubo_load_70_y, ubo_load_70_xz[1]).yxzx;
+  uint2 ubo_load_71 = U[0].xy;
+  vector<float16_t, 2> ubo_load_71_xz = vector<float16_t, 2>(f16tof32(ubo_load_71 & 0xFFFF));
+  float16_t ubo_load_71_y = f16tof32(ubo_load_71[0] >> 16);
+  vector<float16_t, 4> yxzy = vector<float16_t, 3>(ubo_load_71_xz[0], ubo_load_71_y, ubo_load_71_xz[1]).yxzy;
+  uint2 ubo_load_72 = U[0].xy;
+  vector<float16_t, 2> ubo_load_72_xz = vector<float16_t, 2>(f16tof32(ubo_load_72 & 0xFFFF));
+  float16_t ubo_load_72_y = f16tof32(ubo_load_72[0] >> 16);
+  vector<float16_t, 4> yxzz = vector<float16_t, 3>(ubo_load_72_xz[0], ubo_load_72_y, ubo_load_72_xz[1]).yxzz;
+  uint2 ubo_load_73 = U[0].xy;
+  vector<float16_t, 2> ubo_load_73_xz = vector<float16_t, 2>(f16tof32(ubo_load_73 & 0xFFFF));
+  float16_t ubo_load_73_y = f16tof32(ubo_load_73[0] >> 16);
+  vector<float16_t, 4> yyxx = vector<float16_t, 3>(ubo_load_73_xz[0], ubo_load_73_y, ubo_load_73_xz[1]).yyxx;
+  uint2 ubo_load_74 = U[0].xy;
+  vector<float16_t, 2> ubo_load_74_xz = vector<float16_t, 2>(f16tof32(ubo_load_74 & 0xFFFF));
+  float16_t ubo_load_74_y = f16tof32(ubo_load_74[0] >> 16);
+  vector<float16_t, 4> yyxy = vector<float16_t, 3>(ubo_load_74_xz[0], ubo_load_74_y, ubo_load_74_xz[1]).yyxy;
+  uint2 ubo_load_75 = U[0].xy;
+  vector<float16_t, 2> ubo_load_75_xz = vector<float16_t, 2>(f16tof32(ubo_load_75 & 0xFFFF));
+  float16_t ubo_load_75_y = f16tof32(ubo_load_75[0] >> 16);
+  vector<float16_t, 4> yyxz = vector<float16_t, 3>(ubo_load_75_xz[0], ubo_load_75_y, ubo_load_75_xz[1]).yyxz;
+  uint2 ubo_load_76 = U[0].xy;
+  vector<float16_t, 2> ubo_load_76_xz = vector<float16_t, 2>(f16tof32(ubo_load_76 & 0xFFFF));
+  float16_t ubo_load_76_y = f16tof32(ubo_load_76[0] >> 16);
+  vector<float16_t, 4> yyyx = vector<float16_t, 3>(ubo_load_76_xz[0], ubo_load_76_y, ubo_load_76_xz[1]).yyyx;
+  uint2 ubo_load_77 = U[0].xy;
+  vector<float16_t, 2> ubo_load_77_xz = vector<float16_t, 2>(f16tof32(ubo_load_77 & 0xFFFF));
+  float16_t ubo_load_77_y = f16tof32(ubo_load_77[0] >> 16);
+  vector<float16_t, 4> yyyy = vector<float16_t, 3>(ubo_load_77_xz[0], ubo_load_77_y, ubo_load_77_xz[1]).yyyy;
+  uint2 ubo_load_78 = U[0].xy;
+  vector<float16_t, 2> ubo_load_78_xz = vector<float16_t, 2>(f16tof32(ubo_load_78 & 0xFFFF));
+  float16_t ubo_load_78_y = f16tof32(ubo_load_78[0] >> 16);
+  vector<float16_t, 4> yyyz = vector<float16_t, 3>(ubo_load_78_xz[0], ubo_load_78_y, ubo_load_78_xz[1]).yyyz;
+  uint2 ubo_load_79 = U[0].xy;
+  vector<float16_t, 2> ubo_load_79_xz = vector<float16_t, 2>(f16tof32(ubo_load_79 & 0xFFFF));
+  float16_t ubo_load_79_y = f16tof32(ubo_load_79[0] >> 16);
+  vector<float16_t, 4> yyzx = vector<float16_t, 3>(ubo_load_79_xz[0], ubo_load_79_y, ubo_load_79_xz[1]).yyzx;
+  uint2 ubo_load_80 = U[0].xy;
+  vector<float16_t, 2> ubo_load_80_xz = vector<float16_t, 2>(f16tof32(ubo_load_80 & 0xFFFF));
+  float16_t ubo_load_80_y = f16tof32(ubo_load_80[0] >> 16);
+  vector<float16_t, 4> yyzy = vector<float16_t, 3>(ubo_load_80_xz[0], ubo_load_80_y, ubo_load_80_xz[1]).yyzy;
+  uint2 ubo_load_81 = U[0].xy;
+  vector<float16_t, 2> ubo_load_81_xz = vector<float16_t, 2>(f16tof32(ubo_load_81 & 0xFFFF));
+  float16_t ubo_load_81_y = f16tof32(ubo_load_81[0] >> 16);
+  vector<float16_t, 4> yyzz = vector<float16_t, 3>(ubo_load_81_xz[0], ubo_load_81_y, ubo_load_81_xz[1]).yyzz;
+  uint2 ubo_load_82 = U[0].xy;
+  vector<float16_t, 2> ubo_load_82_xz = vector<float16_t, 2>(f16tof32(ubo_load_82 & 0xFFFF));
+  float16_t ubo_load_82_y = f16tof32(ubo_load_82[0] >> 16);
+  vector<float16_t, 4> yzxx = vector<float16_t, 3>(ubo_load_82_xz[0], ubo_load_82_y, ubo_load_82_xz[1]).yzxx;
+  uint2 ubo_load_83 = U[0].xy;
+  vector<float16_t, 2> ubo_load_83_xz = vector<float16_t, 2>(f16tof32(ubo_load_83 & 0xFFFF));
+  float16_t ubo_load_83_y = f16tof32(ubo_load_83[0] >> 16);
+  vector<float16_t, 4> yzxy = vector<float16_t, 3>(ubo_load_83_xz[0], ubo_load_83_y, ubo_load_83_xz[1]).yzxy;
+  uint2 ubo_load_84 = U[0].xy;
+  vector<float16_t, 2> ubo_load_84_xz = vector<float16_t, 2>(f16tof32(ubo_load_84 & 0xFFFF));
+  float16_t ubo_load_84_y = f16tof32(ubo_load_84[0] >> 16);
+  vector<float16_t, 4> yzxz = vector<float16_t, 3>(ubo_load_84_xz[0], ubo_load_84_y, ubo_load_84_xz[1]).yzxz;
+  uint2 ubo_load_85 = U[0].xy;
+  vector<float16_t, 2> ubo_load_85_xz = vector<float16_t, 2>(f16tof32(ubo_load_85 & 0xFFFF));
+  float16_t ubo_load_85_y = f16tof32(ubo_load_85[0] >> 16);
+  vector<float16_t, 4> yzyx = vector<float16_t, 3>(ubo_load_85_xz[0], ubo_load_85_y, ubo_load_85_xz[1]).yzyx;
+  uint2 ubo_load_86 = U[0].xy;
+  vector<float16_t, 2> ubo_load_86_xz = vector<float16_t, 2>(f16tof32(ubo_load_86 & 0xFFFF));
+  float16_t ubo_load_86_y = f16tof32(ubo_load_86[0] >> 16);
+  vector<float16_t, 4> yzyy = vector<float16_t, 3>(ubo_load_86_xz[0], ubo_load_86_y, ubo_load_86_xz[1]).yzyy;
+  uint2 ubo_load_87 = U[0].xy;
+  vector<float16_t, 2> ubo_load_87_xz = vector<float16_t, 2>(f16tof32(ubo_load_87 & 0xFFFF));
+  float16_t ubo_load_87_y = f16tof32(ubo_load_87[0] >> 16);
+  vector<float16_t, 4> yzyz = vector<float16_t, 3>(ubo_load_87_xz[0], ubo_load_87_y, ubo_load_87_xz[1]).yzyz;
+  uint2 ubo_load_88 = U[0].xy;
+  vector<float16_t, 2> ubo_load_88_xz = vector<float16_t, 2>(f16tof32(ubo_load_88 & 0xFFFF));
+  float16_t ubo_load_88_y = f16tof32(ubo_load_88[0] >> 16);
+  vector<float16_t, 4> yzzx = vector<float16_t, 3>(ubo_load_88_xz[0], ubo_load_88_y, ubo_load_88_xz[1]).yzzx;
+  uint2 ubo_load_89 = U[0].xy;
+  vector<float16_t, 2> ubo_load_89_xz = vector<float16_t, 2>(f16tof32(ubo_load_89 & 0xFFFF));
+  float16_t ubo_load_89_y = f16tof32(ubo_load_89[0] >> 16);
+  vector<float16_t, 4> yzzy = vector<float16_t, 3>(ubo_load_89_xz[0], ubo_load_89_y, ubo_load_89_xz[1]).yzzy;
+  uint2 ubo_load_90 = U[0].xy;
+  vector<float16_t, 2> ubo_load_90_xz = vector<float16_t, 2>(f16tof32(ubo_load_90 & 0xFFFF));
+  float16_t ubo_load_90_y = f16tof32(ubo_load_90[0] >> 16);
+  vector<float16_t, 4> yzzz = vector<float16_t, 3>(ubo_load_90_xz[0], ubo_load_90_y, ubo_load_90_xz[1]).yzzz;
+  uint2 ubo_load_91 = U[0].xy;
+  vector<float16_t, 2> ubo_load_91_xz = vector<float16_t, 2>(f16tof32(ubo_load_91 & 0xFFFF));
+  float16_t ubo_load_91_y = f16tof32(ubo_load_91[0] >> 16);
+  vector<float16_t, 4> zxxx = vector<float16_t, 3>(ubo_load_91_xz[0], ubo_load_91_y, ubo_load_91_xz[1]).zxxx;
+  uint2 ubo_load_92 = U[0].xy;
+  vector<float16_t, 2> ubo_load_92_xz = vector<float16_t, 2>(f16tof32(ubo_load_92 & 0xFFFF));
+  float16_t ubo_load_92_y = f16tof32(ubo_load_92[0] >> 16);
+  vector<float16_t, 4> zxxy = vector<float16_t, 3>(ubo_load_92_xz[0], ubo_load_92_y, ubo_load_92_xz[1]).zxxy;
+  uint2 ubo_load_93 = U[0].xy;
+  vector<float16_t, 2> ubo_load_93_xz = vector<float16_t, 2>(f16tof32(ubo_load_93 & 0xFFFF));
+  float16_t ubo_load_93_y = f16tof32(ubo_load_93[0] >> 16);
+  vector<float16_t, 4> zxxz = vector<float16_t, 3>(ubo_load_93_xz[0], ubo_load_93_y, ubo_load_93_xz[1]).zxxz;
+  uint2 ubo_load_94 = U[0].xy;
+  vector<float16_t, 2> ubo_load_94_xz = vector<float16_t, 2>(f16tof32(ubo_load_94 & 0xFFFF));
+  float16_t ubo_load_94_y = f16tof32(ubo_load_94[0] >> 16);
+  vector<float16_t, 4> zxyx = vector<float16_t, 3>(ubo_load_94_xz[0], ubo_load_94_y, ubo_load_94_xz[1]).zxyx;
+  uint2 ubo_load_95 = U[0].xy;
+  vector<float16_t, 2> ubo_load_95_xz = vector<float16_t, 2>(f16tof32(ubo_load_95 & 0xFFFF));
+  float16_t ubo_load_95_y = f16tof32(ubo_load_95[0] >> 16);
+  vector<float16_t, 4> zxyy = vector<float16_t, 3>(ubo_load_95_xz[0], ubo_load_95_y, ubo_load_95_xz[1]).zxyy;
+  uint2 ubo_load_96 = U[0].xy;
+  vector<float16_t, 2> ubo_load_96_xz = vector<float16_t, 2>(f16tof32(ubo_load_96 & 0xFFFF));
+  float16_t ubo_load_96_y = f16tof32(ubo_load_96[0] >> 16);
+  vector<float16_t, 4> zxyz = vector<float16_t, 3>(ubo_load_96_xz[0], ubo_load_96_y, ubo_load_96_xz[1]).zxyz;
+  uint2 ubo_load_97 = U[0].xy;
+  vector<float16_t, 2> ubo_load_97_xz = vector<float16_t, 2>(f16tof32(ubo_load_97 & 0xFFFF));
+  float16_t ubo_load_97_y = f16tof32(ubo_load_97[0] >> 16);
+  vector<float16_t, 4> zxzx = vector<float16_t, 3>(ubo_load_97_xz[0], ubo_load_97_y, ubo_load_97_xz[1]).zxzx;
+  uint2 ubo_load_98 = U[0].xy;
+  vector<float16_t, 2> ubo_load_98_xz = vector<float16_t, 2>(f16tof32(ubo_load_98 & 0xFFFF));
+  float16_t ubo_load_98_y = f16tof32(ubo_load_98[0] >> 16);
+  vector<float16_t, 4> zxzy = vector<float16_t, 3>(ubo_load_98_xz[0], ubo_load_98_y, ubo_load_98_xz[1]).zxzy;
+  uint2 ubo_load_99 = U[0].xy;
+  vector<float16_t, 2> ubo_load_99_xz = vector<float16_t, 2>(f16tof32(ubo_load_99 & 0xFFFF));
+  float16_t ubo_load_99_y = f16tof32(ubo_load_99[0] >> 16);
+  vector<float16_t, 4> zxzz = vector<float16_t, 3>(ubo_load_99_xz[0], ubo_load_99_y, ubo_load_99_xz[1]).zxzz;
+  uint2 ubo_load_100 = U[0].xy;
+  vector<float16_t, 2> ubo_load_100_xz = vector<float16_t, 2>(f16tof32(ubo_load_100 & 0xFFFF));
+  float16_t ubo_load_100_y = f16tof32(ubo_load_100[0] >> 16);
+  vector<float16_t, 4> zyxx = vector<float16_t, 3>(ubo_load_100_xz[0], ubo_load_100_y, ubo_load_100_xz[1]).zyxx;
+  uint2 ubo_load_101 = U[0].xy;
+  vector<float16_t, 2> ubo_load_101_xz = vector<float16_t, 2>(f16tof32(ubo_load_101 & 0xFFFF));
+  float16_t ubo_load_101_y = f16tof32(ubo_load_101[0] >> 16);
+  vector<float16_t, 4> zyxy = vector<float16_t, 3>(ubo_load_101_xz[0], ubo_load_101_y, ubo_load_101_xz[1]).zyxy;
+  uint2 ubo_load_102 = U[0].xy;
+  vector<float16_t, 2> ubo_load_102_xz = vector<float16_t, 2>(f16tof32(ubo_load_102 & 0xFFFF));
+  float16_t ubo_load_102_y = f16tof32(ubo_load_102[0] >> 16);
+  vector<float16_t, 4> zyxz = vector<float16_t, 3>(ubo_load_102_xz[0], ubo_load_102_y, ubo_load_102_xz[1]).zyxz;
+  uint2 ubo_load_103 = U[0].xy;
+  vector<float16_t, 2> ubo_load_103_xz = vector<float16_t, 2>(f16tof32(ubo_load_103 & 0xFFFF));
+  float16_t ubo_load_103_y = f16tof32(ubo_load_103[0] >> 16);
+  vector<float16_t, 4> zyyx = vector<float16_t, 3>(ubo_load_103_xz[0], ubo_load_103_y, ubo_load_103_xz[1]).zyyx;
+  uint2 ubo_load_104 = U[0].xy;
+  vector<float16_t, 2> ubo_load_104_xz = vector<float16_t, 2>(f16tof32(ubo_load_104 & 0xFFFF));
+  float16_t ubo_load_104_y = f16tof32(ubo_load_104[0] >> 16);
+  vector<float16_t, 4> zyyy = vector<float16_t, 3>(ubo_load_104_xz[0], ubo_load_104_y, ubo_load_104_xz[1]).zyyy;
+  uint2 ubo_load_105 = U[0].xy;
+  vector<float16_t, 2> ubo_load_105_xz = vector<float16_t, 2>(f16tof32(ubo_load_105 & 0xFFFF));
+  float16_t ubo_load_105_y = f16tof32(ubo_load_105[0] >> 16);
+  vector<float16_t, 4> zyyz = vector<float16_t, 3>(ubo_load_105_xz[0], ubo_load_105_y, ubo_load_105_xz[1]).zyyz;
+  uint2 ubo_load_106 = U[0].xy;
+  vector<float16_t, 2> ubo_load_106_xz = vector<float16_t, 2>(f16tof32(ubo_load_106 & 0xFFFF));
+  float16_t ubo_load_106_y = f16tof32(ubo_load_106[0] >> 16);
+  vector<float16_t, 4> zyzx = vector<float16_t, 3>(ubo_load_106_xz[0], ubo_load_106_y, ubo_load_106_xz[1]).zyzx;
+  uint2 ubo_load_107 = U[0].xy;
+  vector<float16_t, 2> ubo_load_107_xz = vector<float16_t, 2>(f16tof32(ubo_load_107 & 0xFFFF));
+  float16_t ubo_load_107_y = f16tof32(ubo_load_107[0] >> 16);
+  vector<float16_t, 4> zyzy = vector<float16_t, 3>(ubo_load_107_xz[0], ubo_load_107_y, ubo_load_107_xz[1]).zyzy;
+  uint2 ubo_load_108 = U[0].xy;
+  vector<float16_t, 2> ubo_load_108_xz = vector<float16_t, 2>(f16tof32(ubo_load_108 & 0xFFFF));
+  float16_t ubo_load_108_y = f16tof32(ubo_load_108[0] >> 16);
+  vector<float16_t, 4> zyzz = vector<float16_t, 3>(ubo_load_108_xz[0], ubo_load_108_y, ubo_load_108_xz[1]).zyzz;
+  uint2 ubo_load_109 = U[0].xy;
+  vector<float16_t, 2> ubo_load_109_xz = vector<float16_t, 2>(f16tof32(ubo_load_109 & 0xFFFF));
+  float16_t ubo_load_109_y = f16tof32(ubo_load_109[0] >> 16);
+  vector<float16_t, 4> zzxx = vector<float16_t, 3>(ubo_load_109_xz[0], ubo_load_109_y, ubo_load_109_xz[1]).zzxx;
+  uint2 ubo_load_110 = U[0].xy;
+  vector<float16_t, 2> ubo_load_110_xz = vector<float16_t, 2>(f16tof32(ubo_load_110 & 0xFFFF));
+  float16_t ubo_load_110_y = f16tof32(ubo_load_110[0] >> 16);
+  vector<float16_t, 4> zzxy = vector<float16_t, 3>(ubo_load_110_xz[0], ubo_load_110_y, ubo_load_110_xz[1]).zzxy;
+  uint2 ubo_load_111 = U[0].xy;
+  vector<float16_t, 2> ubo_load_111_xz = vector<float16_t, 2>(f16tof32(ubo_load_111 & 0xFFFF));
+  float16_t ubo_load_111_y = f16tof32(ubo_load_111[0] >> 16);
+  vector<float16_t, 4> zzxz = vector<float16_t, 3>(ubo_load_111_xz[0], ubo_load_111_y, ubo_load_111_xz[1]).zzxz;
+  uint2 ubo_load_112 = U[0].xy;
+  vector<float16_t, 2> ubo_load_112_xz = vector<float16_t, 2>(f16tof32(ubo_load_112 & 0xFFFF));
+  float16_t ubo_load_112_y = f16tof32(ubo_load_112[0] >> 16);
+  vector<float16_t, 4> zzyx = vector<float16_t, 3>(ubo_load_112_xz[0], ubo_load_112_y, ubo_load_112_xz[1]).zzyx;
+  uint2 ubo_load_113 = U[0].xy;
+  vector<float16_t, 2> ubo_load_113_xz = vector<float16_t, 2>(f16tof32(ubo_load_113 & 0xFFFF));
+  float16_t ubo_load_113_y = f16tof32(ubo_load_113[0] >> 16);
+  vector<float16_t, 4> zzyy = vector<float16_t, 3>(ubo_load_113_xz[0], ubo_load_113_y, ubo_load_113_xz[1]).zzyy;
+  uint2 ubo_load_114 = U[0].xy;
+  vector<float16_t, 2> ubo_load_114_xz = vector<float16_t, 2>(f16tof32(ubo_load_114 & 0xFFFF));
+  float16_t ubo_load_114_y = f16tof32(ubo_load_114[0] >> 16);
+  vector<float16_t, 4> zzyz = vector<float16_t, 3>(ubo_load_114_xz[0], ubo_load_114_y, ubo_load_114_xz[1]).zzyz;
+  uint2 ubo_load_115 = U[0].xy;
+  vector<float16_t, 2> ubo_load_115_xz = vector<float16_t, 2>(f16tof32(ubo_load_115 & 0xFFFF));
+  float16_t ubo_load_115_y = f16tof32(ubo_load_115[0] >> 16);
+  vector<float16_t, 4> zzzx = vector<float16_t, 3>(ubo_load_115_xz[0], ubo_load_115_y, ubo_load_115_xz[1]).zzzx;
+  uint2 ubo_load_116 = U[0].xy;
+  vector<float16_t, 2> ubo_load_116_xz = vector<float16_t, 2>(f16tof32(ubo_load_116 & 0xFFFF));
+  float16_t ubo_load_116_y = f16tof32(ubo_load_116[0] >> 16);
+  vector<float16_t, 4> zzzy = vector<float16_t, 3>(ubo_load_116_xz[0], ubo_load_116_y, ubo_load_116_xz[1]).zzzy;
+  uint2 ubo_load_117 = U[0].xy;
+  vector<float16_t, 2> ubo_load_117_xz = vector<float16_t, 2>(f16tof32(ubo_load_117 & 0xFFFF));
+  float16_t ubo_load_117_y = f16tof32(ubo_load_117[0] >> 16);
+  vector<float16_t, 4> zzzz = vector<float16_t, 3>(ubo_load_117_xz[0], ubo_load_117_y, ubo_load_117_xz[1]).zzzz;
+}

diff --git a/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.glsl b/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.glsl
index a5570c5..7794d48 100644
--- a/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.glsl
+++ b/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.glsl

@@ -1,18 +1,141 @@
-SKIP: FAILED
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
 
-expressions/swizzle/read/packed_vec3/f16.wgsl:3:8 error: using f16 types in 'uniform' address space is not implemented yet
-    v: vec3<f16>,
-       ^^^^^^^^^
-
-expressions/swizzle/read/packed_vec3/f16.wgsl:2:1 note: see layout of struct:
-/*           align(8) size(8) */ struct S {
-/* offset(0) align(8) size(6) */   v : vec3<f16>;
-/* offset(6) align(1) size(2) */   // -- implicit struct size padding --;
-/*                            */ };
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void unused_entry_point() {
+  return;
+}
 struct S {
-^^^^^^
+  f16vec3 v;
+  uint pad;
+  uint pad_1;
+};
 
-expressions/swizzle/read/packed_vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> U : S;
-                                   ^
+layout(binding = 0, std140) uniform U_block_ubo {
+  S inner;
+} U;
+
+void f() {
+  f16vec3 v = U.inner.v;
+  float16_t x = U.inner.v.x;
+  float16_t y = U.inner.v.y;
+  float16_t z = U.inner.v.z;
+  f16vec2 xx = U.inner.v.xx;
+  f16vec2 xy = U.inner.v.xy;
+  f16vec2 xz = U.inner.v.xz;
+  f16vec2 yx = U.inner.v.yx;
+  f16vec2 yy = U.inner.v.yy;
+  f16vec2 yz = U.inner.v.yz;
+  f16vec2 zx = U.inner.v.zx;
+  f16vec2 zy = U.inner.v.zy;
+  f16vec2 zz = U.inner.v.zz;
+  f16vec3 xxx = U.inner.v.xxx;
+  f16vec3 xxy = U.inner.v.xxy;
+  f16vec3 xxz = U.inner.v.xxz;
+  f16vec3 xyx = U.inner.v.xyx;
+  f16vec3 xyy = U.inner.v.xyy;
+  f16vec3 xyz = U.inner.v.xyz;
+  f16vec3 xzx = U.inner.v.xzx;
+  f16vec3 xzy = U.inner.v.xzy;
+  f16vec3 xzz = U.inner.v.xzz;
+  f16vec3 yxx = U.inner.v.yxx;
+  f16vec3 yxy = U.inner.v.yxy;
+  f16vec3 yxz = U.inner.v.yxz;
+  f16vec3 yyx = U.inner.v.yyx;
+  f16vec3 yyy = U.inner.v.yyy;
+  f16vec3 yyz = U.inner.v.yyz;
+  f16vec3 yzx = U.inner.v.yzx;
+  f16vec3 yzy = U.inner.v.yzy;
+  f16vec3 yzz = U.inner.v.yzz;
+  f16vec3 zxx = U.inner.v.zxx;
+  f16vec3 zxy = U.inner.v.zxy;
+  f16vec3 zxz = U.inner.v.zxz;
+  f16vec3 zyx = U.inner.v.zyx;
+  f16vec3 zyy = U.inner.v.zyy;
+  f16vec3 zyz = U.inner.v.zyz;
+  f16vec3 zzx = U.inner.v.zzx;
+  f16vec3 zzy = U.inner.v.zzy;
+  f16vec3 zzz = U.inner.v.zzz;
+  f16vec4 xxxx = U.inner.v.xxxx;
+  f16vec4 xxxy = U.inner.v.xxxy;
+  f16vec4 xxxz = U.inner.v.xxxz;
+  f16vec4 xxyx = U.inner.v.xxyx;
+  f16vec4 xxyy = U.inner.v.xxyy;
+  f16vec4 xxyz = U.inner.v.xxyz;
+  f16vec4 xxzx = U.inner.v.xxzx;
+  f16vec4 xxzy = U.inner.v.xxzy;
+  f16vec4 xxzz = U.inner.v.xxzz;
+  f16vec4 xyxx = U.inner.v.xyxx;
+  f16vec4 xyxy = U.inner.v.xyxy;
+  f16vec4 xyxz = U.inner.v.xyxz;
+  f16vec4 xyyx = U.inner.v.xyyx;
+  f16vec4 xyyy = U.inner.v.xyyy;
+  f16vec4 xyyz = U.inner.v.xyyz;
+  f16vec4 xyzx = U.inner.v.xyzx;
+  f16vec4 xyzy = U.inner.v.xyzy;
+  f16vec4 xyzz = U.inner.v.xyzz;
+  f16vec4 xzxx = U.inner.v.xzxx;
+  f16vec4 xzxy = U.inner.v.xzxy;
+  f16vec4 xzxz = U.inner.v.xzxz;
+  f16vec4 xzyx = U.inner.v.xzyx;
+  f16vec4 xzyy = U.inner.v.xzyy;
+  f16vec4 xzyz = U.inner.v.xzyz;
+  f16vec4 xzzx = U.inner.v.xzzx;
+  f16vec4 xzzy = U.inner.v.xzzy;
+  f16vec4 xzzz = U.inner.v.xzzz;
+  f16vec4 yxxx = U.inner.v.yxxx;
+  f16vec4 yxxy = U.inner.v.yxxy;
+  f16vec4 yxxz = U.inner.v.yxxz;
+  f16vec4 yxyx = U.inner.v.yxyx;
+  f16vec4 yxyy = U.inner.v.yxyy;
+  f16vec4 yxyz = U.inner.v.yxyz;
+  f16vec4 yxzx = U.inner.v.yxzx;
+  f16vec4 yxzy = U.inner.v.yxzy;
+  f16vec4 yxzz = U.inner.v.yxzz;
+  f16vec4 yyxx = U.inner.v.yyxx;
+  f16vec4 yyxy = U.inner.v.yyxy;
+  f16vec4 yyxz = U.inner.v.yyxz;
+  f16vec4 yyyx = U.inner.v.yyyx;
+  f16vec4 yyyy = U.inner.v.yyyy;
+  f16vec4 yyyz = U.inner.v.yyyz;
+  f16vec4 yyzx = U.inner.v.yyzx;
+  f16vec4 yyzy = U.inner.v.yyzy;
+  f16vec4 yyzz = U.inner.v.yyzz;
+  f16vec4 yzxx = U.inner.v.yzxx;
+  f16vec4 yzxy = U.inner.v.yzxy;
+  f16vec4 yzxz = U.inner.v.yzxz;
+  f16vec4 yzyx = U.inner.v.yzyx;
+  f16vec4 yzyy = U.inner.v.yzyy;
+  f16vec4 yzyz = U.inner.v.yzyz;
+  f16vec4 yzzx = U.inner.v.yzzx;
+  f16vec4 yzzy = U.inner.v.yzzy;
+  f16vec4 yzzz = U.inner.v.yzzz;
+  f16vec4 zxxx = U.inner.v.zxxx;
+  f16vec4 zxxy = U.inner.v.zxxy;
+  f16vec4 zxxz = U.inner.v.zxxz;
+  f16vec4 zxyx = U.inner.v.zxyx;
+  f16vec4 zxyy = U.inner.v.zxyy;
+  f16vec4 zxyz = U.inner.v.zxyz;
+  f16vec4 zxzx = U.inner.v.zxzx;
+  f16vec4 zxzy = U.inner.v.zxzy;
+  f16vec4 zxzz = U.inner.v.zxzz;
+  f16vec4 zyxx = U.inner.v.zyxx;
+  f16vec4 zyxy = U.inner.v.zyxy;
+  f16vec4 zyxz = U.inner.v.zyxz;
+  f16vec4 zyyx = U.inner.v.zyyx;
+  f16vec4 zyyy = U.inner.v.zyyy;
+  f16vec4 zyyz = U.inner.v.zyyz;
+  f16vec4 zyzx = U.inner.v.zyzx;
+  f16vec4 zyzy = U.inner.v.zyzy;
+  f16vec4 zyzz = U.inner.v.zyzz;
+  f16vec4 zzxx = U.inner.v.zzxx;
+  f16vec4 zzxy = U.inner.v.zzxy;
+  f16vec4 zzxz = U.inner.v.zzxz;
+  f16vec4 zzyx = U.inner.v.zzyx;
+  f16vec4 zzyy = U.inner.v.zzyy;
+  f16vec4 zzyz = U.inner.v.zzyz;
+  f16vec4 zzzx = U.inner.v.zzzx;
+  f16vec4 zzzy = U.inner.v.zzzy;
+  f16vec4 zzzz = U.inner.v.zzzz;
+}
 

diff --git a/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.msl b/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.msl
index 6c8063b..ab91c3e 100644
--- a/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.msl
+++ b/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.msl

@@ -1,18 +1,145 @@
-SKIP: FAILED
+#include <metal_stdlib>
 
-swizzle/read/packed_vec3/f16.wgsl:3:8 error: using f16 types in 'uniform' address space is not implemented yet
-    v: vec3<f16>,
-       ^^^^^^^^^
+using namespace metal;
 
-swizzle/read/packed_vec3/f16.wgsl:2:1 note: see layout of struct:
-/*           align(8) size(8) */ struct S {
-/* offset(0) align(8) size(6) */   v : vec3<f16>;
-/* offset(6) align(1) size(2) */   // -- implicit struct size padding --;
-/*                            */ };
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
 struct S {
-^^^^^^
+  /* 0x0000 */ packed_half3 v;
+  /* 0x0006 */ tint_array<int8_t, 2> tint_pad;
+};
 
-swizzle/read/packed_vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> U : S;
-                                   ^
+void f(const constant S* const tint_symbol) {
+  half3 v = half3((*(tint_symbol)).v);
+  half x = (*(tint_symbol)).v[0];
+  half y = (*(tint_symbol)).v[1];
+  half z = (*(tint_symbol)).v[2];
+  half2 xx = half3((*(tint_symbol)).v).xx;
+  half2 xy = half3((*(tint_symbol)).v).xy;
+  half2 xz = half3((*(tint_symbol)).v).xz;
+  half2 yx = half3((*(tint_symbol)).v).yx;
+  half2 yy = half3((*(tint_symbol)).v).yy;
+  half2 yz = half3((*(tint_symbol)).v).yz;
+  half2 zx = half3((*(tint_symbol)).v).zx;
+  half2 zy = half3((*(tint_symbol)).v).zy;
+  half2 zz = half3((*(tint_symbol)).v).zz;
+  half3 xxx = half3((*(tint_symbol)).v).xxx;
+  half3 xxy = half3((*(tint_symbol)).v).xxy;
+  half3 xxz = half3((*(tint_symbol)).v).xxz;
+  half3 xyx = half3((*(tint_symbol)).v).xyx;
+  half3 xyy = half3((*(tint_symbol)).v).xyy;
+  half3 xyz = half3((*(tint_symbol)).v).xyz;
+  half3 xzx = half3((*(tint_symbol)).v).xzx;
+  half3 xzy = half3((*(tint_symbol)).v).xzy;
+  half3 xzz = half3((*(tint_symbol)).v).xzz;
+  half3 yxx = half3((*(tint_symbol)).v).yxx;
+  half3 yxy = half3((*(tint_symbol)).v).yxy;
+  half3 yxz = half3((*(tint_symbol)).v).yxz;
+  half3 yyx = half3((*(tint_symbol)).v).yyx;
+  half3 yyy = half3((*(tint_symbol)).v).yyy;
+  half3 yyz = half3((*(tint_symbol)).v).yyz;
+  half3 yzx = half3((*(tint_symbol)).v).yzx;
+  half3 yzy = half3((*(tint_symbol)).v).yzy;
+  half3 yzz = half3((*(tint_symbol)).v).yzz;
+  half3 zxx = half3((*(tint_symbol)).v).zxx;
+  half3 zxy = half3((*(tint_symbol)).v).zxy;
+  half3 zxz = half3((*(tint_symbol)).v).zxz;
+  half3 zyx = half3((*(tint_symbol)).v).zyx;
+  half3 zyy = half3((*(tint_symbol)).v).zyy;
+  half3 zyz = half3((*(tint_symbol)).v).zyz;
+  half3 zzx = half3((*(tint_symbol)).v).zzx;
+  half3 zzy = half3((*(tint_symbol)).v).zzy;
+  half3 zzz = half3((*(tint_symbol)).v).zzz;
+  half4 xxxx = half3((*(tint_symbol)).v).xxxx;
+  half4 xxxy = half3((*(tint_symbol)).v).xxxy;
+  half4 xxxz = half3((*(tint_symbol)).v).xxxz;
+  half4 xxyx = half3((*(tint_symbol)).v).xxyx;
+  half4 xxyy = half3((*(tint_symbol)).v).xxyy;
+  half4 xxyz = half3((*(tint_symbol)).v).xxyz;
+  half4 xxzx = half3((*(tint_symbol)).v).xxzx;
+  half4 xxzy = half3((*(tint_symbol)).v).xxzy;
+  half4 xxzz = half3((*(tint_symbol)).v).xxzz;
+  half4 xyxx = half3((*(tint_symbol)).v).xyxx;
+  half4 xyxy = half3((*(tint_symbol)).v).xyxy;
+  half4 xyxz = half3((*(tint_symbol)).v).xyxz;
+  half4 xyyx = half3((*(tint_symbol)).v).xyyx;
+  half4 xyyy = half3((*(tint_symbol)).v).xyyy;
+  half4 xyyz = half3((*(tint_symbol)).v).xyyz;
+  half4 xyzx = half3((*(tint_symbol)).v).xyzx;
+  half4 xyzy = half3((*(tint_symbol)).v).xyzy;
+  half4 xyzz = half3((*(tint_symbol)).v).xyzz;
+  half4 xzxx = half3((*(tint_symbol)).v).xzxx;
+  half4 xzxy = half3((*(tint_symbol)).v).xzxy;
+  half4 xzxz = half3((*(tint_symbol)).v).xzxz;
+  half4 xzyx = half3((*(tint_symbol)).v).xzyx;
+  half4 xzyy = half3((*(tint_symbol)).v).xzyy;
+  half4 xzyz = half3((*(tint_symbol)).v).xzyz;
+  half4 xzzx = half3((*(tint_symbol)).v).xzzx;
+  half4 xzzy = half3((*(tint_symbol)).v).xzzy;
+  half4 xzzz = half3((*(tint_symbol)).v).xzzz;
+  half4 yxxx = half3((*(tint_symbol)).v).yxxx;
+  half4 yxxy = half3((*(tint_symbol)).v).yxxy;
+  half4 yxxz = half3((*(tint_symbol)).v).yxxz;
+  half4 yxyx = half3((*(tint_symbol)).v).yxyx;
+  half4 yxyy = half3((*(tint_symbol)).v).yxyy;
+  half4 yxyz = half3((*(tint_symbol)).v).yxyz;
+  half4 yxzx = half3((*(tint_symbol)).v).yxzx;
+  half4 yxzy = half3((*(tint_symbol)).v).yxzy;
+  half4 yxzz = half3((*(tint_symbol)).v).yxzz;
+  half4 yyxx = half3((*(tint_symbol)).v).yyxx;
+  half4 yyxy = half3((*(tint_symbol)).v).yyxy;
+  half4 yyxz = half3((*(tint_symbol)).v).yyxz;
+  half4 yyyx = half3((*(tint_symbol)).v).yyyx;
+  half4 yyyy = half3((*(tint_symbol)).v).yyyy;
+  half4 yyyz = half3((*(tint_symbol)).v).yyyz;
+  half4 yyzx = half3((*(tint_symbol)).v).yyzx;
+  half4 yyzy = half3((*(tint_symbol)).v).yyzy;
+  half4 yyzz = half3((*(tint_symbol)).v).yyzz;
+  half4 yzxx = half3((*(tint_symbol)).v).yzxx;
+  half4 yzxy = half3((*(tint_symbol)).v).yzxy;
+  half4 yzxz = half3((*(tint_symbol)).v).yzxz;
+  half4 yzyx = half3((*(tint_symbol)).v).yzyx;
+  half4 yzyy = half3((*(tint_symbol)).v).yzyy;
+  half4 yzyz = half3((*(tint_symbol)).v).yzyz;
+  half4 yzzx = half3((*(tint_symbol)).v).yzzx;
+  half4 yzzy = half3((*(tint_symbol)).v).yzzy;
+  half4 yzzz = half3((*(tint_symbol)).v).yzzz;
+  half4 zxxx = half3((*(tint_symbol)).v).zxxx;
+  half4 zxxy = half3((*(tint_symbol)).v).zxxy;
+  half4 zxxz = half3((*(tint_symbol)).v).zxxz;
+  half4 zxyx = half3((*(tint_symbol)).v).zxyx;
+  half4 zxyy = half3((*(tint_symbol)).v).zxyy;
+  half4 zxyz = half3((*(tint_symbol)).v).zxyz;
+  half4 zxzx = half3((*(tint_symbol)).v).zxzx;
+  half4 zxzy = half3((*(tint_symbol)).v).zxzy;
+  half4 zxzz = half3((*(tint_symbol)).v).zxzz;
+  half4 zyxx = half3((*(tint_symbol)).v).zyxx;
+  half4 zyxy = half3((*(tint_symbol)).v).zyxy;
+  half4 zyxz = half3((*(tint_symbol)).v).zyxz;
+  half4 zyyx = half3((*(tint_symbol)).v).zyyx;
+  half4 zyyy = half3((*(tint_symbol)).v).zyyy;
+  half4 zyyz = half3((*(tint_symbol)).v).zyyz;
+  half4 zyzx = half3((*(tint_symbol)).v).zyzx;
+  half4 zyzy = half3((*(tint_symbol)).v).zyzy;
+  half4 zyzz = half3((*(tint_symbol)).v).zyzz;
+  half4 zzxx = half3((*(tint_symbol)).v).zzxx;
+  half4 zzxy = half3((*(tint_symbol)).v).zzxy;
+  half4 zzxz = half3((*(tint_symbol)).v).zzxz;
+  half4 zzyx = half3((*(tint_symbol)).v).zzyx;
+  half4 zzyy = half3((*(tint_symbol)).v).zzyy;
+  half4 zzyz = half3((*(tint_symbol)).v).zzyz;
+  half4 zzzx = half3((*(tint_symbol)).v).zzzx;
+  half4 zzzy = half3((*(tint_symbol)).v).zzzy;
+  half4 zzzz = half3((*(tint_symbol)).v).zzzz;
+}
 

diff --git a/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.spvasm b/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.spvasm
index 6c8063b..d7796c5 100644
--- a/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.spvasm
+++ b/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.spvasm

@@ -1,18 +1,780 @@
-SKIP: FAILED
-
-swizzle/read/packed_vec3/f16.wgsl:3:8 error: using f16 types in 'uniform' address space is not implemented yet
-    v: vec3<f16>,
-       ^^^^^^^^^
-
-swizzle/read/packed_vec3/f16.wgsl:2:1 note: see layout of struct:
-/*           align(8) size(8) */ struct S {
-/* offset(0) align(8) size(6) */   v : vec3<f16>;
-/* offset(6) align(1) size(2) */   // -- implicit struct size padding --;
-/*                            */ };
-struct S {
-^^^^^^
-
-swizzle/read/packed_vec3/f16.wgsl:6:36 note: see declaration of variable
-@group(0) @binding(0) var<uniform> U : S;
-                                   ^
-
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 509
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %unused_entry_point "unused_entry_point"
+               OpExecutionMode %unused_entry_point LocalSize 1 1 1
+               OpName %U_block "U_block"
+               OpMemberName %U_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "v"
+               OpName %U "U"
+               OpName %unused_entry_point "unused_entry_point"
+               OpName %f "f"
+               OpName %v "v"
+               OpName %x "x"
+               OpName %y "y"
+               OpName %z "z"
+               OpName %xx "xx"
+               OpName %xy "xy"
+               OpName %xz "xz"
+               OpName %yx "yx"
+               OpName %yy "yy"
+               OpName %yz "yz"
+               OpName %zx "zx"
+               OpName %zy "zy"
+               OpName %zz "zz"
+               OpName %xxx "xxx"
+               OpName %xxy "xxy"
+               OpName %xxz "xxz"
+               OpName %xyx "xyx"
+               OpName %xyy "xyy"
+               OpName %xyz "xyz"
+               OpName %xzx "xzx"
+               OpName %xzy "xzy"
+               OpName %xzz "xzz"
+               OpName %yxx "yxx"
+               OpName %yxy "yxy"
+               OpName %yxz "yxz"
+               OpName %yyx "yyx"
+               OpName %yyy "yyy"
+               OpName %yyz "yyz"
+               OpName %yzx "yzx"
+               OpName %yzy "yzy"
+               OpName %yzz "yzz"
+               OpName %zxx "zxx"
+               OpName %zxy "zxy"
+               OpName %zxz "zxz"
+               OpName %zyx "zyx"
+               OpName %zyy "zyy"
+               OpName %zyz "zyz"
+               OpName %zzx "zzx"
+               OpName %zzy "zzy"
+               OpName %zzz "zzz"
+               OpName %xxxx "xxxx"
+               OpName %xxxy "xxxy"
+               OpName %xxxz "xxxz"
+               OpName %xxyx "xxyx"
+               OpName %xxyy "xxyy"
+               OpName %xxyz "xxyz"
+               OpName %xxzx "xxzx"
+               OpName %xxzy "xxzy"
+               OpName %xxzz "xxzz"
+               OpName %xyxx "xyxx"
+               OpName %xyxy "xyxy"
+               OpName %xyxz "xyxz"
+               OpName %xyyx "xyyx"
+               OpName %xyyy "xyyy"
+               OpName %xyyz "xyyz"
+               OpName %xyzx "xyzx"
+               OpName %xyzy "xyzy"
+               OpName %xyzz "xyzz"
+               OpName %xzxx "xzxx"
+               OpName %xzxy "xzxy"
+               OpName %xzxz "xzxz"
+               OpName %xzyx "xzyx"
+               OpName %xzyy "xzyy"
+               OpName %xzyz "xzyz"
+               OpName %xzzx "xzzx"
+               OpName %xzzy "xzzy"
+               OpName %xzzz "xzzz"
+               OpName %yxxx "yxxx"
+               OpName %yxxy "yxxy"
+               OpName %yxxz "yxxz"
+               OpName %yxyx "yxyx"
+               OpName %yxyy "yxyy"
+               OpName %yxyz "yxyz"
+               OpName %yxzx "yxzx"
+               OpName %yxzy "yxzy"
+               OpName %yxzz "yxzz"
+               OpName %yyxx "yyxx"
+               OpName %yyxy "yyxy"
+               OpName %yyxz "yyxz"
+               OpName %yyyx "yyyx"
+               OpName %yyyy "yyyy"
+               OpName %yyyz "yyyz"
+               OpName %yyzx "yyzx"
+               OpName %yyzy "yyzy"
+               OpName %yyzz "yyzz"
+               OpName %yzxx "yzxx"
+               OpName %yzxy "yzxy"
+               OpName %yzxz "yzxz"
+               OpName %yzyx "yzyx"
+               OpName %yzyy "yzyy"
+               OpName %yzyz "yzyz"
+               OpName %yzzx "yzzx"
+               OpName %yzzy "yzzy"
+               OpName %yzzz "yzzz"
+               OpName %zxxx "zxxx"
+               OpName %zxxy "zxxy"
+               OpName %zxxz "zxxz"
+               OpName %zxyx "zxyx"
+               OpName %zxyy "zxyy"
+               OpName %zxyz "zxyz"
+               OpName %zxzx "zxzx"
+               OpName %zxzy "zxzy"
+               OpName %zxzz "zxzz"
+               OpName %zyxx "zyxx"
+               OpName %zyxy "zyxy"
+               OpName %zyxz "zyxz"
+               OpName %zyyx "zyyx"
+               OpName %zyyy "zyyy"
+               OpName %zyyz "zyyz"
+               OpName %zyzx "zyzx"
+               OpName %zyzy "zyzy"
+               OpName %zyzz "zyzz"
+               OpName %zzxx "zzxx"
+               OpName %zzxy "zzxy"
+               OpName %zzxz "zzxz"
+               OpName %zzyx "zzyx"
+               OpName %zzyy "zzyy"
+               OpName %zzyz "zzyz"
+               OpName %zzzx "zzzx"
+               OpName %zzzy "zzzy"
+               OpName %zzzz "zzzz"
+               OpDecorate %U_block Block
+               OpMemberDecorate %U_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpDecorate %U NonWritable
+               OpDecorate %U DescriptorSet 0
+               OpDecorate %U Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+          %S = OpTypeStruct %v3half
+    %U_block = OpTypeStruct %S
+%_ptr_Uniform_U_block = OpTypePointer Uniform %U_block
+          %U = OpVariable %_ptr_Uniform_U_block Uniform
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%_ptr_Function_v3half = OpTypePointer Function %v3half
+         %20 = OpConstantNull %v3half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%_ptr_Function_half = OpTypePointer Function %half
+         %26 = OpConstantNull %half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %v2half = OpTypeVector %half 2
+%_ptr_Function_v2half = OpTypePointer Function %v2half
+         %41 = OpConstantNull %v2half
+     %v4half = OpTypeVector %half 4
+%_ptr_Function_v4half = OpTypePointer Function %v4half
+        %188 = OpConstantNull %v4half
+%unused_entry_point = OpFunction %void None %7
+         %10 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %7
+         %12 = OpLabel
+          %v = OpVariable %_ptr_Function_v3half Function %20
+          %x = OpVariable %_ptr_Function_half Function %26
+          %y = OpVariable %_ptr_Function_half Function %26
+          %z = OpVariable %_ptr_Function_half Function %26
+         %xx = OpVariable %_ptr_Function_v2half Function %41
+         %xy = OpVariable %_ptr_Function_v2half Function %41
+         %xz = OpVariable %_ptr_Function_v2half Function %41
+         %yx = OpVariable %_ptr_Function_v2half Function %41
+         %yy = OpVariable %_ptr_Function_v2half Function %41
+         %yz = OpVariable %_ptr_Function_v2half Function %41
+         %zx = OpVariable %_ptr_Function_v2half Function %41
+         %zy = OpVariable %_ptr_Function_v2half Function %41
+         %zz = OpVariable %_ptr_Function_v2half Function %41
+        %xxx = OpVariable %_ptr_Function_v3half Function %20
+        %xxy = OpVariable %_ptr_Function_v3half Function %20
+        %xxz = OpVariable %_ptr_Function_v3half Function %20
+        %xyx = OpVariable %_ptr_Function_v3half Function %20
+        %xyy = OpVariable %_ptr_Function_v3half Function %20
+        %xyz = OpVariable %_ptr_Function_v3half Function %20
+        %xzx = OpVariable %_ptr_Function_v3half Function %20
+        %xzy = OpVariable %_ptr_Function_v3half Function %20
+        %xzz = OpVariable %_ptr_Function_v3half Function %20
+        %yxx = OpVariable %_ptr_Function_v3half Function %20
+        %yxy = OpVariable %_ptr_Function_v3half Function %20
+        %yxz = OpVariable %_ptr_Function_v3half Function %20
+        %yyx = OpVariable %_ptr_Function_v3half Function %20
+        %yyy = OpVariable %_ptr_Function_v3half Function %20
+        %yyz = OpVariable %_ptr_Function_v3half Function %20
+        %yzx = OpVariable %_ptr_Function_v3half Function %20
+        %yzy = OpVariable %_ptr_Function_v3half Function %20
+        %yzz = OpVariable %_ptr_Function_v3half Function %20
+        %zxx = OpVariable %_ptr_Function_v3half Function %20
+        %zxy = OpVariable %_ptr_Function_v3half Function %20
+        %zxz = OpVariable %_ptr_Function_v3half Function %20
+        %zyx = OpVariable %_ptr_Function_v3half Function %20
+        %zyy = OpVariable %_ptr_Function_v3half Function %20
+        %zyz = OpVariable %_ptr_Function_v3half Function %20
+        %zzx = OpVariable %_ptr_Function_v3half Function %20
+        %zzy = OpVariable %_ptr_Function_v3half Function %20
+        %zzz = OpVariable %_ptr_Function_v3half Function %20
+       %xxxx = OpVariable %_ptr_Function_v4half Function %188
+       %xxxy = OpVariable %_ptr_Function_v4half Function %188
+       %xxxz = OpVariable %_ptr_Function_v4half Function %188
+       %xxyx = OpVariable %_ptr_Function_v4half Function %188
+       %xxyy = OpVariable %_ptr_Function_v4half Function %188
+       %xxyz = OpVariable %_ptr_Function_v4half Function %188
+       %xxzx = OpVariable %_ptr_Function_v4half Function %188
+       %xxzy = OpVariable %_ptr_Function_v4half Function %188
+       %xxzz = OpVariable %_ptr_Function_v4half Function %188
+       %xyxx = OpVariable %_ptr_Function_v4half Function %188
+       %xyxy = OpVariable %_ptr_Function_v4half Function %188
+       %xyxz = OpVariable %_ptr_Function_v4half Function %188
+       %xyyx = OpVariable %_ptr_Function_v4half Function %188
+       %xyyy = OpVariable %_ptr_Function_v4half Function %188
+       %xyyz = OpVariable %_ptr_Function_v4half Function %188
+       %xyzx = OpVariable %_ptr_Function_v4half Function %188
+       %xyzy = OpVariable %_ptr_Function_v4half Function %188
+       %xyzz = OpVariable %_ptr_Function_v4half Function %188
+       %xzxx = OpVariable %_ptr_Function_v4half Function %188
+       %xzxy = OpVariable %_ptr_Function_v4half Function %188
+       %xzxz = OpVariable %_ptr_Function_v4half Function %188
+       %xzyx = OpVariable %_ptr_Function_v4half Function %188
+       %xzyy = OpVariable %_ptr_Function_v4half Function %188
+       %xzyz = OpVariable %_ptr_Function_v4half Function %188
+       %xzzx = OpVariable %_ptr_Function_v4half Function %188
+       %xzzy = OpVariable %_ptr_Function_v4half Function %188
+       %xzzz = OpVariable %_ptr_Function_v4half Function %188
+       %yxxx = OpVariable %_ptr_Function_v4half Function %188
+       %yxxy = OpVariable %_ptr_Function_v4half Function %188
+       %yxxz = OpVariable %_ptr_Function_v4half Function %188
+       %yxyx = OpVariable %_ptr_Function_v4half Function %188
+       %yxyy = OpVariable %_ptr_Function_v4half Function %188
+       %yxyz = OpVariable %_ptr_Function_v4half Function %188
+       %yxzx = OpVariable %_ptr_Function_v4half Function %188
+       %yxzy = OpVariable %_ptr_Function_v4half Function %188
+       %yxzz = OpVariable %_ptr_Function_v4half Function %188
+       %yyxx = OpVariable %_ptr_Function_v4half Function %188
+       %yyxy = OpVariable %_ptr_Function_v4half Function %188
+       %yyxz = OpVariable %_ptr_Function_v4half Function %188
+       %yyyx = OpVariable %_ptr_Function_v4half Function %188
+       %yyyy = OpVariable %_ptr_Function_v4half Function %188
+       %yyyz = OpVariable %_ptr_Function_v4half Function %188
+       %yyzx = OpVariable %_ptr_Function_v4half Function %188
+       %yyzy = OpVariable %_ptr_Function_v4half Function %188
+       %yyzz = OpVariable %_ptr_Function_v4half Function %188
+       %yzxx = OpVariable %_ptr_Function_v4half Function %188
+       %yzxy = OpVariable %_ptr_Function_v4half Function %188
+       %yzxz = OpVariable %_ptr_Function_v4half Function %188
+       %yzyx = OpVariable %_ptr_Function_v4half Function %188
+       %yzyy = OpVariable %_ptr_Function_v4half Function %188
+       %yzyz = OpVariable %_ptr_Function_v4half Function %188
+       %yzzx = OpVariable %_ptr_Function_v4half Function %188
+       %yzzy = OpVariable %_ptr_Function_v4half Function %188
+       %yzzz = OpVariable %_ptr_Function_v4half Function %188
+       %zxxx = OpVariable %_ptr_Function_v4half Function %188
+       %zxxy = OpVariable %_ptr_Function_v4half Function %188
+       %zxxz = OpVariable %_ptr_Function_v4half Function %188
+       %zxyx = OpVariable %_ptr_Function_v4half Function %188
+       %zxyy = OpVariable %_ptr_Function_v4half Function %188
+       %zxyz = OpVariable %_ptr_Function_v4half Function %188
+       %zxzx = OpVariable %_ptr_Function_v4half Function %188
+       %zxzy = OpVariable %_ptr_Function_v4half Function %188
+       %zxzz = OpVariable %_ptr_Function_v4half Function %188
+       %zyxx = OpVariable %_ptr_Function_v4half Function %188
+       %zyxy = OpVariable %_ptr_Function_v4half Function %188
+       %zyxz = OpVariable %_ptr_Function_v4half Function %188
+       %zyyx = OpVariable %_ptr_Function_v4half Function %188
+       %zyyy = OpVariable %_ptr_Function_v4half Function %188
+       %zyyz = OpVariable %_ptr_Function_v4half Function %188
+       %zyzx = OpVariable %_ptr_Function_v4half Function %188
+       %zyzy = OpVariable %_ptr_Function_v4half Function %188
+       %zyzz = OpVariable %_ptr_Function_v4half Function %188
+       %zzxx = OpVariable %_ptr_Function_v4half Function %188
+       %zzxy = OpVariable %_ptr_Function_v4half Function %188
+       %zzxz = OpVariable %_ptr_Function_v4half Function %188
+       %zzyx = OpVariable %_ptr_Function_v4half Function %188
+       %zzyy = OpVariable %_ptr_Function_v4half Function %188
+       %zzyz = OpVariable %_ptr_Function_v4half Function %188
+       %zzzx = OpVariable %_ptr_Function_v4half Function %188
+       %zzzy = OpVariable %_ptr_Function_v4half Function %188
+       %zzzz = OpVariable %_ptr_Function_v4half Function %188
+         %16 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %17 = OpLoad %v3half %16
+               OpStore %v %17
+         %22 = OpAccessChain %_ptr_Uniform_half %U %uint_0 %uint_0 %uint_0
+         %23 = OpLoad %half %22
+               OpStore %x %23
+         %28 = OpAccessChain %_ptr_Uniform_half %U %uint_0 %uint_0 %uint_1
+         %29 = OpLoad %half %28
+               OpStore %y %29
+         %32 = OpAccessChain %_ptr_Uniform_half %U %uint_0 %uint_0 %uint_2
+         %33 = OpLoad %half %32
+               OpStore %z %33
+         %35 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %37 = OpLoad %v3half %35
+         %38 = OpVectorShuffle %v2half %37 %37 0 0
+               OpStore %xx %38
+         %42 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %43 = OpLoad %v3half %42
+         %44 = OpVectorShuffle %v2half %43 %43 0 1
+               OpStore %xy %44
+         %46 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %47 = OpLoad %v3half %46
+         %48 = OpVectorShuffle %v2half %47 %47 0 2
+               OpStore %xz %48
+         %50 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %51 = OpLoad %v3half %50
+         %52 = OpVectorShuffle %v2half %51 %51 1 0
+               OpStore %yx %52
+         %54 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %55 = OpLoad %v3half %54
+         %56 = OpVectorShuffle %v2half %55 %55 1 1
+               OpStore %yy %56
+         %58 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %59 = OpLoad %v3half %58
+         %60 = OpVectorShuffle %v2half %59 %59 1 2
+               OpStore %yz %60
+         %62 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %63 = OpLoad %v3half %62
+         %64 = OpVectorShuffle %v2half %63 %63 2 0
+               OpStore %zx %64
+         %66 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %67 = OpLoad %v3half %66
+         %68 = OpVectorShuffle %v2half %67 %67 2 1
+               OpStore %zy %68
+         %70 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %71 = OpLoad %v3half %70
+         %72 = OpVectorShuffle %v2half %71 %71 2 2
+               OpStore %zz %72
+         %74 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %75 = OpLoad %v3half %74
+         %76 = OpVectorShuffle %v3half %75 %75 0 0 0
+               OpStore %xxx %76
+         %78 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %79 = OpLoad %v3half %78
+         %80 = OpVectorShuffle %v3half %79 %79 0 0 1
+               OpStore %xxy %80
+         %82 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %83 = OpLoad %v3half %82
+         %84 = OpVectorShuffle %v3half %83 %83 0 0 2
+               OpStore %xxz %84
+         %86 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %87 = OpLoad %v3half %86
+         %88 = OpVectorShuffle %v3half %87 %87 0 1 0
+               OpStore %xyx %88
+         %90 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %91 = OpLoad %v3half %90
+         %92 = OpVectorShuffle %v3half %91 %91 0 1 1
+               OpStore %xyy %92
+         %94 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %95 = OpLoad %v3half %94
+         %96 = OpVectorShuffle %v3half %95 %95 0 1 2
+               OpStore %xyz %96
+         %98 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+         %99 = OpLoad %v3half %98
+        %100 = OpVectorShuffle %v3half %99 %99 0 2 0
+               OpStore %xzx %100
+        %102 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %103 = OpLoad %v3half %102
+        %104 = OpVectorShuffle %v3half %103 %103 0 2 1
+               OpStore %xzy %104
+        %106 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %107 = OpLoad %v3half %106
+        %108 = OpVectorShuffle %v3half %107 %107 0 2 2
+               OpStore %xzz %108
+        %110 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %111 = OpLoad %v3half %110
+        %112 = OpVectorShuffle %v3half %111 %111 1 0 0
+               OpStore %yxx %112
+        %114 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %115 = OpLoad %v3half %114
+        %116 = OpVectorShuffle %v3half %115 %115 1 0 1
+               OpStore %yxy %116
+        %118 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %119 = OpLoad %v3half %118
+        %120 = OpVectorShuffle %v3half %119 %119 1 0 2
+               OpStore %yxz %120
+        %122 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %123 = OpLoad %v3half %122
+        %124 = OpVectorShuffle %v3half %123 %123 1 1 0
+               OpStore %yyx %124
+        %126 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %127 = OpLoad %v3half %126
+        %128 = OpVectorShuffle %v3half %127 %127 1 1 1
+               OpStore %yyy %128
+        %130 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %131 = OpLoad %v3half %130
+        %132 = OpVectorShuffle %v3half %131 %131 1 1 2
+               OpStore %yyz %132
+        %134 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %135 = OpLoad %v3half %134
+        %136 = OpVectorShuffle %v3half %135 %135 1 2 0
+               OpStore %yzx %136
+        %138 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %139 = OpLoad %v3half %138
+        %140 = OpVectorShuffle %v3half %139 %139 1 2 1
+               OpStore %yzy %140
+        %142 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %143 = OpLoad %v3half %142
+        %144 = OpVectorShuffle %v3half %143 %143 1 2 2
+               OpStore %yzz %144
+        %146 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %147 = OpLoad %v3half %146
+        %148 = OpVectorShuffle %v3half %147 %147 2 0 0
+               OpStore %zxx %148
+        %150 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %151 = OpLoad %v3half %150
+        %152 = OpVectorShuffle %v3half %151 %151 2 0 1
+               OpStore %zxy %152
+        %154 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %155 = OpLoad %v3half %154
+        %156 = OpVectorShuffle %v3half %155 %155 2 0 2
+               OpStore %zxz %156
+        %158 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %159 = OpLoad %v3half %158
+        %160 = OpVectorShuffle %v3half %159 %159 2 1 0
+               OpStore %zyx %160
+        %162 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %163 = OpLoad %v3half %162
+        %164 = OpVectorShuffle %v3half %163 %163 2 1 1
+               OpStore %zyy %164
+        %166 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %167 = OpLoad %v3half %166
+        %168 = OpVectorShuffle %v3half %167 %167 2 1 2
+               OpStore %zyz %168
+        %170 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %171 = OpLoad %v3half %170
+        %172 = OpVectorShuffle %v3half %171 %171 2 2 0
+               OpStore %zzx %172
+        %174 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %175 = OpLoad %v3half %174
+        %176 = OpVectorShuffle %v3half %175 %175 2 2 1
+               OpStore %zzy %176
+        %178 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %179 = OpLoad %v3half %178
+        %180 = OpVectorShuffle %v3half %179 %179 2 2 2
+               OpStore %zzz %180
+        %182 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %184 = OpLoad %v3half %182
+        %185 = OpVectorShuffle %v4half %184 %184 0 0 0 0
+               OpStore %xxxx %185
+        %189 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %190 = OpLoad %v3half %189
+        %191 = OpVectorShuffle %v4half %190 %190 0 0 0 1
+               OpStore %xxxy %191
+        %193 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %194 = OpLoad %v3half %193
+        %195 = OpVectorShuffle %v4half %194 %194 0 0 0 2
+               OpStore %xxxz %195
+        %197 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %198 = OpLoad %v3half %197
+        %199 = OpVectorShuffle %v4half %198 %198 0 0 1 0
+               OpStore %xxyx %199
+        %201 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %202 = OpLoad %v3half %201
+        %203 = OpVectorShuffle %v4half %202 %202 0 0 1 1
+               OpStore %xxyy %203
+        %205 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %206 = OpLoad %v3half %205
+        %207 = OpVectorShuffle %v4half %206 %206 0 0 1 2
+               OpStore %xxyz %207
+        %209 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %210 = OpLoad %v3half %209
+        %211 = OpVectorShuffle %v4half %210 %210 0 0 2 0
+               OpStore %xxzx %211
+        %213 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %214 = OpLoad %v3half %213
+        %215 = OpVectorShuffle %v4half %214 %214 0 0 2 1
+               OpStore %xxzy %215
+        %217 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %218 = OpLoad %v3half %217
+        %219 = OpVectorShuffle %v4half %218 %218 0 0 2 2
+               OpStore %xxzz %219
+        %221 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %222 = OpLoad %v3half %221
+        %223 = OpVectorShuffle %v4half %222 %222 0 1 0 0
+               OpStore %xyxx %223
+        %225 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %226 = OpLoad %v3half %225
+        %227 = OpVectorShuffle %v4half %226 %226 0 1 0 1
+               OpStore %xyxy %227
+        %229 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %230 = OpLoad %v3half %229
+        %231 = OpVectorShuffle %v4half %230 %230 0 1 0 2
+               OpStore %xyxz %231
+        %233 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %234 = OpLoad %v3half %233
+        %235 = OpVectorShuffle %v4half %234 %234 0 1 1 0
+               OpStore %xyyx %235
+        %237 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %238 = OpLoad %v3half %237
+        %239 = OpVectorShuffle %v4half %238 %238 0 1 1 1
+               OpStore %xyyy %239
+        %241 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %242 = OpLoad %v3half %241
+        %243 = OpVectorShuffle %v4half %242 %242 0 1 1 2
+               OpStore %xyyz %243
+        %245 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %246 = OpLoad %v3half %245
+        %247 = OpVectorShuffle %v4half %246 %246 0 1 2 0
+               OpStore %xyzx %247
+        %249 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %250 = OpLoad %v3half %249
+        %251 = OpVectorShuffle %v4half %250 %250 0 1 2 1
+               OpStore %xyzy %251
+        %253 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %254 = OpLoad %v3half %253
+        %255 = OpVectorShuffle %v4half %254 %254 0 1 2 2
+               OpStore %xyzz %255
+        %257 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %258 = OpLoad %v3half %257
+        %259 = OpVectorShuffle %v4half %258 %258 0 2 0 0
+               OpStore %xzxx %259
+        %261 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %262 = OpLoad %v3half %261
+        %263 = OpVectorShuffle %v4half %262 %262 0 2 0 1
+               OpStore %xzxy %263
+        %265 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %266 = OpLoad %v3half %265
+        %267 = OpVectorShuffle %v4half %266 %266 0 2 0 2
+               OpStore %xzxz %267
+        %269 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %270 = OpLoad %v3half %269
+        %271 = OpVectorShuffle %v4half %270 %270 0 2 1 0
+               OpStore %xzyx %271
+        %273 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %274 = OpLoad %v3half %273
+        %275 = OpVectorShuffle %v4half %274 %274 0 2 1 1
+               OpStore %xzyy %275
+        %277 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %278 = OpLoad %v3half %277
+        %279 = OpVectorShuffle %v4half %278 %278 0 2 1 2
+               OpStore %xzyz %279
+        %281 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %282 = OpLoad %v3half %281
+        %283 = OpVectorShuffle %v4half %282 %282 0 2 2 0
+               OpStore %xzzx %283
+        %285 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %286 = OpLoad %v3half %285
+        %287 = OpVectorShuffle %v4half %286 %286 0 2 2 1
+               OpStore %xzzy %287
+        %289 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %290 = OpLoad %v3half %289
+        %291 = OpVectorShuffle %v4half %290 %290 0 2 2 2
+               OpStore %xzzz %291
+        %293 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %294 = OpLoad %v3half %293
+        %295 = OpVectorShuffle %v4half %294 %294 1 0 0 0
+               OpStore %yxxx %295
+        %297 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %298 = OpLoad %v3half %297
+        %299 = OpVectorShuffle %v4half %298 %298 1 0 0 1
+               OpStore %yxxy %299
+        %301 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %302 = OpLoad %v3half %301
+        %303 = OpVectorShuffle %v4half %302 %302 1 0 0 2
+               OpStore %yxxz %303
+        %305 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %306 = OpLoad %v3half %305
+        %307 = OpVectorShuffle %v4half %306 %306 1 0 1 0
+               OpStore %yxyx %307
+        %309 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %310 = OpLoad %v3half %309
+        %311 = OpVectorShuffle %v4half %310 %310 1 0 1 1
+               OpStore %yxyy %311
+        %313 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %314 = OpLoad %v3half %313
+        %315 = OpVectorShuffle %v4half %314 %314 1 0 1 2
+               OpStore %yxyz %315
+        %317 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %318 = OpLoad %v3half %317
+        %319 = OpVectorShuffle %v4half %318 %318 1 0 2 0
+               OpStore %yxzx %319
+        %321 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %322 = OpLoad %v3half %321
+        %323 = OpVectorShuffle %v4half %322 %322 1 0 2 1
+               OpStore %yxzy %323
+        %325 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %326 = OpLoad %v3half %325
+        %327 = OpVectorShuffle %v4half %326 %326 1 0 2 2
+               OpStore %yxzz %327
+        %329 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %330 = OpLoad %v3half %329
+        %331 = OpVectorShuffle %v4half %330 %330 1 1 0 0
+               OpStore %yyxx %331
+        %333 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %334 = OpLoad %v3half %333
+        %335 = OpVectorShuffle %v4half %334 %334 1 1 0 1
+               OpStore %yyxy %335
+        %337 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %338 = OpLoad %v3half %337
+        %339 = OpVectorShuffle %v4half %338 %338 1 1 0 2
+               OpStore %yyxz %339
+        %341 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %342 = OpLoad %v3half %341
+        %343 = OpVectorShuffle %v4half %342 %342 1 1 1 0
+               OpStore %yyyx %343
+        %345 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %346 = OpLoad %v3half %345
+        %347 = OpVectorShuffle %v4half %346 %346 1 1 1 1
+               OpStore %yyyy %347
+        %349 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %350 = OpLoad %v3half %349
+        %351 = OpVectorShuffle %v4half %350 %350 1 1 1 2
+               OpStore %yyyz %351
+        %353 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %354 = OpLoad %v3half %353
+        %355 = OpVectorShuffle %v4half %354 %354 1 1 2 0
+               OpStore %yyzx %355
+        %357 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %358 = OpLoad %v3half %357
+        %359 = OpVectorShuffle %v4half %358 %358 1 1 2 1
+               OpStore %yyzy %359
+        %361 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %362 = OpLoad %v3half %361
+        %363 = OpVectorShuffle %v4half %362 %362 1 1 2 2
+               OpStore %yyzz %363
+        %365 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %366 = OpLoad %v3half %365
+        %367 = OpVectorShuffle %v4half %366 %366 1 2 0 0
+               OpStore %yzxx %367
+        %369 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %370 = OpLoad %v3half %369
+        %371 = OpVectorShuffle %v4half %370 %370 1 2 0 1
+               OpStore %yzxy %371
+        %373 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %374 = OpLoad %v3half %373
+        %375 = OpVectorShuffle %v4half %374 %374 1 2 0 2
+               OpStore %yzxz %375
+        %377 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %378 = OpLoad %v3half %377
+        %379 = OpVectorShuffle %v4half %378 %378 1 2 1 0
+               OpStore %yzyx %379
+        %381 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %382 = OpLoad %v3half %381
+        %383 = OpVectorShuffle %v4half %382 %382 1 2 1 1
+               OpStore %yzyy %383
+        %385 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %386 = OpLoad %v3half %385
+        %387 = OpVectorShuffle %v4half %386 %386 1 2 1 2
+               OpStore %yzyz %387
+        %389 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %390 = OpLoad %v3half %389
+        %391 = OpVectorShuffle %v4half %390 %390 1 2 2 0
+               OpStore %yzzx %391
+        %393 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %394 = OpLoad %v3half %393
+        %395 = OpVectorShuffle %v4half %394 %394 1 2 2 1
+               OpStore %yzzy %395
+        %397 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %398 = OpLoad %v3half %397
+        %399 = OpVectorShuffle %v4half %398 %398 1 2 2 2
+               OpStore %yzzz %399
+        %401 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %402 = OpLoad %v3half %401
+        %403 = OpVectorShuffle %v4half %402 %402 2 0 0 0
+               OpStore %zxxx %403
+        %405 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %406 = OpLoad %v3half %405
+        %407 = OpVectorShuffle %v4half %406 %406 2 0 0 1
+               OpStore %zxxy %407
+        %409 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %410 = OpLoad %v3half %409
+        %411 = OpVectorShuffle %v4half %410 %410 2 0 0 2
+               OpStore %zxxz %411
+        %413 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %414 = OpLoad %v3half %413
+        %415 = OpVectorShuffle %v4half %414 %414 2 0 1 0
+               OpStore %zxyx %415
+        %417 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %418 = OpLoad %v3half %417
+        %419 = OpVectorShuffle %v4half %418 %418 2 0 1 1
+               OpStore %zxyy %419
+        %421 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %422 = OpLoad %v3half %421
+        %423 = OpVectorShuffle %v4half %422 %422 2 0 1 2
+               OpStore %zxyz %423
+        %425 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %426 = OpLoad %v3half %425
+        %427 = OpVectorShuffle %v4half %426 %426 2 0 2 0
+               OpStore %zxzx %427
+        %429 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %430 = OpLoad %v3half %429
+        %431 = OpVectorShuffle %v4half %430 %430 2 0 2 1
+               OpStore %zxzy %431
+        %433 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %434 = OpLoad %v3half %433
+        %435 = OpVectorShuffle %v4half %434 %434 2 0 2 2
+               OpStore %zxzz %435
+        %437 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %438 = OpLoad %v3half %437
+        %439 = OpVectorShuffle %v4half %438 %438 2 1 0 0
+               OpStore %zyxx %439
+        %441 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %442 = OpLoad %v3half %441
+        %443 = OpVectorShuffle %v4half %442 %442 2 1 0 1
+               OpStore %zyxy %443
+        %445 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %446 = OpLoad %v3half %445
+        %447 = OpVectorShuffle %v4half %446 %446 2 1 0 2
+               OpStore %zyxz %447
+        %449 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %450 = OpLoad %v3half %449
+        %451 = OpVectorShuffle %v4half %450 %450 2 1 1 0
+               OpStore %zyyx %451
+        %453 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %454 = OpLoad %v3half %453
+        %455 = OpVectorShuffle %v4half %454 %454 2 1 1 1
+               OpStore %zyyy %455
+        %457 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %458 = OpLoad %v3half %457
+        %459 = OpVectorShuffle %v4half %458 %458 2 1 1 2
+               OpStore %zyyz %459
+        %461 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %462 = OpLoad %v3half %461
+        %463 = OpVectorShuffle %v4half %462 %462 2 1 2 0
+               OpStore %zyzx %463
+        %465 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %466 = OpLoad %v3half %465
+        %467 = OpVectorShuffle %v4half %466 %466 2 1 2 1
+               OpStore %zyzy %467
+        %469 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %470 = OpLoad %v3half %469
+        %471 = OpVectorShuffle %v4half %470 %470 2 1 2 2
+               OpStore %zyzz %471
+        %473 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %474 = OpLoad %v3half %473
+        %475 = OpVectorShuffle %v4half %474 %474 2 2 0 0
+               OpStore %zzxx %475
+        %477 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %478 = OpLoad %v3half %477
+        %479 = OpVectorShuffle %v4half %478 %478 2 2 0 1
+               OpStore %zzxy %479
+        %481 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %482 = OpLoad %v3half %481
+        %483 = OpVectorShuffle %v4half %482 %482 2 2 0 2
+               OpStore %zzxz %483
+        %485 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %486 = OpLoad %v3half %485
+        %487 = OpVectorShuffle %v4half %486 %486 2 2 1 0
+               OpStore %zzyx %487
+        %489 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %490 = OpLoad %v3half %489
+        %491 = OpVectorShuffle %v4half %490 %490 2 2 1 1
+               OpStore %zzyy %491
+        %493 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %494 = OpLoad %v3half %493
+        %495 = OpVectorShuffle %v4half %494 %494 2 2 1 2
+               OpStore %zzyz %495
+        %497 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %498 = OpLoad %v3half %497
+        %499 = OpVectorShuffle %v4half %498 %498 2 2 2 0
+               OpStore %zzzx %499
+        %501 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %502 = OpLoad %v3half %501
+        %503 = OpVectorShuffle %v4half %502 %502 2 2 2 1
+               OpStore %zzzy %503
+        %505 = OpAccessChain %_ptr_Uniform_v3half %U %uint_0 %uint_0
+        %506 = OpLoad %v3half %505
+        %507 = OpVectorShuffle %v4half %506 %506 2 2 2 2
+               OpStore %zzzz %507
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.wgsl b/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.wgsl
index 6c8063b..ada9f1f 100644
--- a/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.wgsl
+++ b/test/tint/expressions/swizzle/read/packed_vec3/f16.wgsl.expected.wgsl

@@ -1,18 +1,131 @@
-SKIP: FAILED
+enable f16;
 
-swizzle/read/packed_vec3/f16.wgsl:3:8 error: using f16 types in 'uniform' address space is not implemented yet
-    v: vec3<f16>,
-       ^^^^^^^^^
-
-swizzle/read/packed_vec3/f16.wgsl:2:1 note: see layout of struct:
-/*           align(8) size(8) */ struct S {
-/* offset(0) align(8) size(6) */   v : vec3<f16>;
-/* offset(6) align(1) size(2) */   // -- implicit struct size padding --;
-/*                            */ };
 struct S {
-^^^^^^
+  v : vec3<f16>,
+}
 
-swizzle/read/packed_vec3/f16.wgsl:6:36 note: see declaration of variable
 @group(0) @binding(0) var<uniform> U : S;
-                                   ^
 
+fn f() {
+  var v = U.v;
+  var x = U.v.x;
+  var y = U.v.y;
+  var z = U.v.z;
+  var xx = U.v.xx;
+  var xy = U.v.xy;
+  var xz = U.v.xz;
+  var yx = U.v.yx;
+  var yy = U.v.yy;
+  var yz = U.v.yz;
+  var zx = U.v.zx;
+  var zy = U.v.zy;
+  var zz = U.v.zz;
+  var xxx = U.v.xxx;
+  var xxy = U.v.xxy;
+  var xxz = U.v.xxz;
+  var xyx = U.v.xyx;
+  var xyy = U.v.xyy;
+  var xyz = U.v.xyz;
+  var xzx = U.v.xzx;
+  var xzy = U.v.xzy;
+  var xzz = U.v.xzz;
+  var yxx = U.v.yxx;
+  var yxy = U.v.yxy;
+  var yxz = U.v.yxz;
+  var yyx = U.v.yyx;
+  var yyy = U.v.yyy;
+  var yyz = U.v.yyz;
+  var yzx = U.v.yzx;
+  var yzy = U.v.yzy;
+  var yzz = U.v.yzz;
+  var zxx = U.v.zxx;
+  var zxy = U.v.zxy;
+  var zxz = U.v.zxz;
+  var zyx = U.v.zyx;
+  var zyy = U.v.zyy;
+  var zyz = U.v.zyz;
+  var zzx = U.v.zzx;
+  var zzy = U.v.zzy;
+  var zzz = U.v.zzz;
+  var xxxx = U.v.xxxx;
+  var xxxy = U.v.xxxy;
+  var xxxz = U.v.xxxz;
+  var xxyx = U.v.xxyx;
+  var xxyy = U.v.xxyy;
+  var xxyz = U.v.xxyz;
+  var xxzx = U.v.xxzx;
+  var xxzy = U.v.xxzy;
+  var xxzz = U.v.xxzz;
+  var xyxx = U.v.xyxx;
+  var xyxy = U.v.xyxy;
+  var xyxz = U.v.xyxz;
+  var xyyx = U.v.xyyx;
+  var xyyy = U.v.xyyy;
+  var xyyz = U.v.xyyz;
+  var xyzx = U.v.xyzx;
+  var xyzy = U.v.xyzy;
+  var xyzz = U.v.xyzz;
+  var xzxx = U.v.xzxx;
+  var xzxy = U.v.xzxy;
+  var xzxz = U.v.xzxz;
+  var xzyx = U.v.xzyx;
+  var xzyy = U.v.xzyy;
+  var xzyz = U.v.xzyz;
+  var xzzx = U.v.xzzx;
+  var xzzy = U.v.xzzy;
+  var xzzz = U.v.xzzz;
+  var yxxx = U.v.yxxx;
+  var yxxy = U.v.yxxy;
+  var yxxz = U.v.yxxz;
+  var yxyx = U.v.yxyx;
+  var yxyy = U.v.yxyy;
+  var yxyz = U.v.yxyz;
+  var yxzx = U.v.yxzx;
+  var yxzy = U.v.yxzy;
+  var yxzz = U.v.yxzz;
+  var yyxx = U.v.yyxx;
+  var yyxy = U.v.yyxy;
+  var yyxz = U.v.yyxz;
+  var yyyx = U.v.yyyx;
+  var yyyy = U.v.yyyy;
+  var yyyz = U.v.yyyz;
+  var yyzx = U.v.yyzx;
+  var yyzy = U.v.yyzy;
+  var yyzz = U.v.yyzz;
+  var yzxx = U.v.yzxx;
+  var yzxy = U.v.yzxy;
+  var yzxz = U.v.yzxz;
+  var yzyx = U.v.yzyx;
+  var yzyy = U.v.yzyy;
+  var yzyz = U.v.yzyz;
+  var yzzx = U.v.yzzx;
+  var yzzy = U.v.yzzy;
+  var yzzz = U.v.yzzz;
+  var zxxx = U.v.zxxx;
+  var zxxy = U.v.zxxy;
+  var zxxz = U.v.zxxz;
+  var zxyx = U.v.zxyx;
+  var zxyy = U.v.zxyy;
+  var zxyz = U.v.zxyz;
+  var zxzx = U.v.zxzx;
+  var zxzy = U.v.zxzy;
+  var zxzz = U.v.zxzz;
+  var zyxx = U.v.zyxx;
+  var zyxy = U.v.zyxy;
+  var zyxz = U.v.zyxz;
+  var zyyx = U.v.zyyx;
+  var zyyy = U.v.zyyy;
+  var zyyz = U.v.zyyz;
+  var zyzx = U.v.zyzx;
+  var zyzy = U.v.zyzy;
+  var zyzz = U.v.zyzz;
+  var zzxx = U.v.zzxx;
+  var zzxy = U.v.zzxy;
+  var zzxz = U.v.zzxz;
+  var zzyx = U.v.zzyx;
+  var zzyy = U.v.zzyy;
+  var zzyz = U.v.zzyz;
+  var zzzx = U.v.zzzx;
+  var zzzy = U.v.zzzy;
+  var zzzz = U.v.zzzz;
+}

diff --git a/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.dxc.hlsl b/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.dxc.hlsl
index 4053a55..61cc535 100644
--- a/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.dxc.hlsl
+++ b/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.dxc.hlsl

@@ -1,18 +1,13 @@
-SKIP: FAILED
+[numthreads(1, 1, 1)]
+void unused_entry_point() {
+  return;
+}
 
-swizzle/write/packed_vec3/f16.wgsl:3:8 error: using f16 types in 'storage' address space is not implemented yet
-    v: vec3<f16>,
-       ^^^^^^^^^
+RWByteAddressBuffer U : register(u0, space0);
 
-swizzle/write/packed_vec3/f16.wgsl:2:1 note: see layout of struct:
-/*           align(8) size(8) */ struct S {
-/* offset(0) align(8) size(6) */   v : vec3<f16>;
-/* offset(6) align(1) size(2) */   // -- implicit struct size padding --;
-/*                            */ };
-struct S {
-^^^^^^
-
-swizzle/write/packed_vec3/f16.wgsl:6:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> U : S;
-                                               ^
-
+void f() {
+  U.Store<vector<float16_t, 3> >(0u, vector<float16_t, 3>(float16_t(1.0h), float16_t(2.0h), float16_t(3.0h)));
+  U.Store<float16_t>(0u, float16_t(1.0h));
+  U.Store<float16_t>(2u, float16_t(2.0h));
+  U.Store<float16_t>(4u, float16_t(3.0h));
+}

diff --git a/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.glsl b/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.glsl
index cb935a3..a4cf8e2 100644
--- a/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.glsl
+++ b/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.glsl

@@ -1,18 +1,22 @@
-SKIP: FAILED
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
 
-expressions/swizzle/write/packed_vec3/f16.wgsl:3:8 error: using f16 types in 'storage' address space is not implemented yet
-    v: vec3<f16>,
-       ^^^^^^^^^
-
-expressions/swizzle/write/packed_vec3/f16.wgsl:2:1 note: see layout of struct:
-/*           align(8) size(8) */ struct S {
-/* offset(0) align(8) size(6) */   v : vec3<f16>;
-/* offset(6) align(1) size(2) */   // -- implicit struct size padding --;
-/*                            */ };
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void unused_entry_point() {
+  return;
+}
 struct S {
-^^^^^^
+  f16vec3 v;
+};
 
-expressions/swizzle/write/packed_vec3/f16.wgsl:6:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> U : S;
-                                               ^
+layout(binding = 0, std430) buffer U_block_ssbo {
+  S inner;
+} U;
+
+void f() {
+  U.inner.v = f16vec3(1.0hf, 2.0hf, 3.0hf);
+  U.inner.v.x = 1.0hf;
+  U.inner.v.y = 2.0hf;
+  U.inner.v.z = 3.0hf;
+}
 

diff --git a/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.msl b/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.msl
index 4053a55..92500cc 100644
--- a/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.msl
+++ b/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.msl

@@ -1,18 +1,28 @@
-SKIP: FAILED
+#include <metal_stdlib>
 
-swizzle/write/packed_vec3/f16.wgsl:3:8 error: using f16 types in 'storage' address space is not implemented yet
-    v: vec3<f16>,
-       ^^^^^^^^^
+using namespace metal;
 
-swizzle/write/packed_vec3/f16.wgsl:2:1 note: see layout of struct:
-/*           align(8) size(8) */ struct S {
-/* offset(0) align(8) size(6) */   v : vec3<f16>;
-/* offset(6) align(1) size(2) */   // -- implicit struct size padding --;
-/*                            */ };
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
 struct S {
-^^^^^^
+  /* 0x0000 */ packed_half3 v;
+  /* 0x0006 */ tint_array<int8_t, 2> tint_pad;
+};
 
-swizzle/write/packed_vec3/f16.wgsl:6:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> U : S;
-                                               ^
+void f(device S* const tint_symbol) {
+  (*(tint_symbol)).v = half3(1.0h, 2.0h, 3.0h);
+  (*(tint_symbol)).v[0] = 1.0h;
+  (*(tint_symbol)).v[1] = 2.0h;
+  (*(tint_symbol)).v[2] = 3.0h;
+}
 

diff --git a/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.spvasm b/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.spvasm
index 4053a55..0efe146 100644
--- a/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.spvasm
+++ b/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.spvasm

@@ -1,18 +1,59 @@
-SKIP: FAILED
-
-swizzle/write/packed_vec3/f16.wgsl:3:8 error: using f16 types in 'storage' address space is not implemented yet
-    v: vec3<f16>,
-       ^^^^^^^^^
-
-swizzle/write/packed_vec3/f16.wgsl:2:1 note: see layout of struct:
-/*           align(8) size(8) */ struct S {
-/* offset(0) align(8) size(6) */   v : vec3<f16>;
-/* offset(6) align(1) size(2) */   // -- implicit struct size padding --;
-/*                            */ };
-struct S {
-^^^^^^
-
-swizzle/write/packed_vec3/f16.wgsl:6:48 note: see declaration of variable
-@group(0) @binding(0) var<storage, read_write> U : S;
-                                               ^
-
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %unused_entry_point "unused_entry_point"
+               OpExecutionMode %unused_entry_point LocalSize 1 1 1
+               OpName %U_block "U_block"
+               OpMemberName %U_block 0 "inner"
+               OpName %S "S"
+               OpMemberName %S 0 "v"
+               OpName %U "U"
+               OpName %unused_entry_point "unused_entry_point"
+               OpName %f "f"
+               OpDecorate %U_block Block
+               OpMemberDecorate %U_block 0 Offset 0
+               OpMemberDecorate %S 0 Offset 0
+               OpDecorate %U DescriptorSet 0
+               OpDecorate %U Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+          %S = OpTypeStruct %v3half
+    %U_block = OpTypeStruct %S
+%_ptr_StorageBuffer_U_block = OpTypePointer StorageBuffer %U_block
+          %U = OpVariable %_ptr_StorageBuffer_U_block StorageBuffer
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+%half_0x1p_0 = OpConstant %half 0x1p+0
+%half_0x1p_1 = OpConstant %half 0x1p+1
+%half_0x1_8p_1 = OpConstant %half 0x1.8p+1
+         %20 = OpConstantComposite %v3half %half_0x1p_0 %half_0x1p_1 %half_0x1_8p_1
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+%unused_entry_point = OpFunction %void None %7
+         %10 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %7
+         %12 = OpLabel
+         %16 = OpAccessChain %_ptr_StorageBuffer_v3half %U %uint_0 %uint_0
+               OpStore %16 %20
+         %22 = OpAccessChain %_ptr_StorageBuffer_half %U %uint_0 %uint_0 %uint_0
+               OpStore %22 %half_0x1p_0
+         %24 = OpAccessChain %_ptr_StorageBuffer_half %U %uint_0 %uint_0 %uint_1
+               OpStore %24 %half_0x1p_1
+         %26 = OpAccessChain %_ptr_StorageBuffer_half %U %uint_0 %uint_0 %uint_2
+               OpStore %26 %half_0x1_8p_1
+               OpReturn
+               OpFunctionEnd

diff --git a/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.wgsl b/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.wgsl
index 4053a55..daa08f6 100644
--- a/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.wgsl
+++ b/test/tint/expressions/swizzle/write/packed_vec3/f16.wgsl.expected.wgsl

@@ -1,18 +1,14 @@
-SKIP: FAILED
+enable f16;
 
-swizzle/write/packed_vec3/f16.wgsl:3:8 error: using f16 types in 'storage' address space is not implemented yet
-    v: vec3<f16>,
-       ^^^^^^^^^
-
-swizzle/write/packed_vec3/f16.wgsl:2:1 note: see layout of struct:
-/*           align(8) size(8) */ struct S {
-/* offset(0) align(8) size(6) */   v : vec3<f16>;
-/* offset(6) align(1) size(2) */   // -- implicit struct size padding --;
-/*                            */ };
 struct S {
-^^^^^^
+  v : vec3<f16>,
+}
 
-swizzle/write/packed_vec3/f16.wgsl:6:48 note: see declaration of variable
 @group(0) @binding(0) var<storage, read_write> U : S;
-                                               ^
 
+fn f() {
+  U.v = vec3<f16>(1.0h, 2.0h, 3.0h);
+  U.v.x = 1.0h;
+  U.v.y = 2.0h;
+  U.v.z = 3.0h;
+}
commit	ab9b5f3aa5996054993f01914fe5a6833a2c8e38	[log] [tgz]
author	Zhaoming Jiang <zhaoming.jiang@intel.com>	Thu Nov 24 05:25:35 2022 +0000
committer	Dawn LUCI CQ <dawn-scoped@luci-project-accounts.iam.gserviceaccount.com>	Thu Nov 24 05:25:35 2022 +0000
tree	8192501908f4907c9ed4c31ac57860d5471c1191
parent	ff2b5e441cc7417281f08135c80702c74b91c9ad [diff]